summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Treehugger Robot <treehugger-gerrit@google.com> 2017-05-10 14:04:18 +0000
committer Gerrit Code Review <noreply-gerritcodereview@google.com> 2017-05-10 14:04:19 +0000
commit270970e660d3c99e62a88b18144d159dd8699c55 (patch)
tree910248487abab475a9109925a9089fedd08b45c1 /compiler/optimizing
parent201c81947f23b3f43c2ff78d9c1078dd53157fe5 (diff)
parent88abba2b0cb0151d89e16da3e64025878dc2f142 (diff)
Merge "ARM/AOT: Allow 16-bit LDR for Baker read barrier loads."
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm.cc50
-rw-r--r--compiler/optimizing/code_generator_arm64.cc4
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc56
3 files changed, 70 insertions, 40 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f5f40fc686..cf2a391e8f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -90,13 +90,17 @@ static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instru
}
static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
- DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler()));
__ BindTrackedLabel(bne_label);
Label placeholder_label;
__ b(&placeholder_label, NE); // Placeholder, patched at link-time.
__ Bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) {
+ return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u;
+}
+
static constexpr int kRegListThreshold = 4;
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -8057,8 +8061,9 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
// return_address:
CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow);
Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8071,16 +8076,18 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, root_reg, obj, offset);
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8180,10 +8187,12 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register ref_reg = ref.AsRegister<Register>();
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = temp.AsRegister<Register>();
@@ -8191,10 +8200,14 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
CheckLastTempIsBakerCcEntrypointRegister(instruction);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow);
Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8207,19 +8220,20 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- Register ref_reg = ref.AsRegister<Register>();
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, ref_reg, base, offset);
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8265,7 +8279,7 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8290,15 +8304,15 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7d9778a4e7..2af3e3a3ea 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -6102,7 +6102,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6197,7 +6197,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 3ad2b129b0..9f03a39bd5 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -124,6 +124,10 @@ static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Lab
__ bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
+ return rt.IsLow() && rn.IsLow() && offset < 32u;
+}
+
class EmitAdrCode {
public:
EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
@@ -8167,8 +8171,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
+ root_reg.GetCode(), narrow);
vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8183,15 +8188,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8292,10 +8298,12 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
vixl32::Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = RegisterFrom(temp);
@@ -8303,12 +8311,15 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(),
- obj.GetCode());
+ base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8325,19 +8336,24 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
- __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // Note: We need a Wide NEG for the unpoisoning.
+ // Note: We need a specific width for the unpoisoning NEG.
if (kPoisonHeapReferences) {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8383,7 +8399,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8413,9 +8429,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
// Note: We need a Wide NEG for the unpoisoning.
@@ -8423,6 +8437,8 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
__ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}