diff options
| author | 2017-05-10 14:04:18 +0000 | |
|---|---|---|
| committer | 2017-05-10 14:04:19 +0000 | |
| commit | 270970e660d3c99e62a88b18144d159dd8699c55 (patch) | |
| tree | 910248487abab475a9109925a9089fedd08b45c1 /compiler/optimizing | |
| parent | 201c81947f23b3f43c2ff78d9c1078dd53157fe5 (diff) | |
| parent | 88abba2b0cb0151d89e16da3e64025878dc2f142 (diff) | |
Merge "ARM/AOT: Allow 16-bit LDR for Baker read barrier loads."
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 50 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 4 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 56 |
3 files changed, 70 insertions, 40 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index f5f40fc686..cf2a391e8f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -90,13 +90,17 @@ static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instru } static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) { - DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit()); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler())); __ BindTrackedLabel(bne_label); Label placeholder_label; __ b(&placeholder_label, NE); // Placeholder, patched at link-time. __ Bind(&placeholder_label); } +static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) { + return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u; +} + static constexpr int kRegListThreshold = 4; // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, @@ -8057,8 +8061,9 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct // return_address: CheckLastTempIsBakerCcEntrypointRegister(instruction); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg); + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow); Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8071,16 +8076,18 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - static_assert( - BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); // Currently the offset is always within range. If that changes, // we shall have to split the load the same way as for fields. DCHECK_LT(offset, kReferenceLoadMinFarOffset); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ LoadFromOffset(kLoadWord, root_reg, obj, offset); EmitPlaceholderBne(codegen_, bne_label); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // Note that we do not actually check the value of // `GetIsGcMarking()` to decide whether to mark the loaded GC @@ -8180,10 +8187,12 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register ref_reg = ref.AsRegister<Register>(); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { base = temp.AsRegister<Register>(); @@ -8191,10 +8200,14 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u)); offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); } CheckLastTempIsBakerCcEntrypointRegister(instruction); uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj); + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow); Label* bne_label = NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8207,19 +8220,20 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); - Register ref_reg = ref.AsRegister<Register>(); DCHECK_LT(offset, kReferenceLoadMinFarOffset); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ LoadFromOffset(kLoadWord, ref_reg, base, offset); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); return; } @@ -8265,7 +8279,7 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -8290,15 +8304,15 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); return; } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7d9778a4e7..2af3e3a3ea 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -6102,7 +6102,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); @@ -6197,7 +6197,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 3ad2b129b0..9f03a39bd5 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -124,6 +124,10 @@ static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Lab __ bind(&placeholder_label); } +static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { + return rt.IsLow() && rn.IsLow() && offset < 32u; +} + class EmitAdrCode { public: EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) @@ -8167,8 +8171,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( + root_reg.GetCode(), narrow); vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8183,15 +8188,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); - static_assert( - BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); // Currently the offset is always within range. If that changes, // we shall have to split the load the same way as for fields. DCHECK_LT(offset, kReferenceLoadMinFarOffset); - __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); EmitPlaceholderBne(codegen_, bne_label); __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // Note that we do not actually check the value of // `GetIsGcMarking()` to decide whether to mark the loaded GC @@ -8292,10 +8298,12 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); vixl32::Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { base = RegisterFrom(temp); @@ -8303,12 +8311,15 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); } UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), - obj.GetCode()); + base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8325,19 +8336,24 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); - vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); - __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // Note: We need a Wide NEG for the unpoisoning. + // Note: We need a specific width for the unpoisoning NEG. if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } } __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); return; } @@ -8383,7 +8399,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -8413,9 +8429,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. // Note: We need a Wide NEG for the unpoisoning. @@ -8423,6 +8437,8 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); return; } |