diff options
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 292 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 418 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 3 |
4 files changed, 251 insertions, 465 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 260920cb0c..723446b45f 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -89,15 +89,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. -// For the Baker read barrier implementation using link-generated thunks we need to split +// For the Baker read barrier implementation using link-time generated thunks we need to split // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; -// Flags controlling the use of link-time generated thunks for Baker read barriers. -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; - // Some instructions have special requirements for a temporary, for example // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require // temp that's not an R0 (to avoid an extra move) and Baker read barrier field @@ -680,50 +675,6 @@ class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64); }; -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { - public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); -}; - // Slow path loading `obj`'s lock word, loading a reference from // object `*(obj + offset + (index << scale_factor))` into `ref`, and // marking `ref` if `obj` is gray according to the lock word (Baker @@ -2324,17 +2275,16 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - !field_info.IsVolatile()) { - // If link-time thunks for the Baker read barrier are enabled, for AOT - // non-volatile loads we need a temporary only if the offset is too big. + if (!field_info.IsVolatile()) { + // We need a temporary register for the read barrier load in + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { locations->AddTemp(FixedTempLocation()); } } else { + // Volatile fields need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(). locations->AddTemp(Location::RequiresRegister()); } } @@ -2798,14 +2748,11 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - instruction->GetIndex()->IsConstant()) { + if (instruction->GetIndex()->IsConstant()) { // Array loads with constant index are treated as field loads. - // If link-time thunks for the Baker read barrier are enabled, for AOT - // constant index loads we need a temporary only if the offset is too big. + // We need a temporary register for the read barrier load in + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); offset += index << DataType::SizeShift(DataType::Type::kReference); @@ -2813,6 +2760,8 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { locations->AddTemp(FixedTempLocation()); } } else { + // We need a non-scratch temporary for the array data pointer in + // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(). locations->AddTemp(Location::RequiresRegister()); } } @@ -2868,7 +2817,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { Register temp = WRegisterFrom(locations->GetTemp(0)); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); + out, obj.W(), offset, index, temp, /* needs_null_check */ false); } } else { // General case. @@ -6276,74 +6225,39 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the reference - // and jumps to the entrypoint if needed. - // - // lr = &return_address; - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto gc_root_thunk<root_reg>(lr) - // } - // return_address: - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); - - ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - if (fixup_label != nullptr) { - __ bind(fixup_label); - } - static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 instruction (8B) before the return address label."); - __ ldr(root_reg, MemOperand(obj.X(), offset)); - EmitBakerReadBarrierCbnz(custom_data); - __ bind(&return_address); - } else { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will - // be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root); - AddSlowPath(slow_path); - - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - if (fixup_label == nullptr) { - __ Ldr(root_reg, MemOperand(obj, offset)); - } else { - EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); - } - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + + ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + if (fixup_label != nullptr) { + __ bind(fixup_label); } + static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 instruction (8B) before the return address label."); + __ ldr(root_reg, MemOperand(obj.X(), offset)); + EmitBakerReadBarrierCbnz(custom_data); + __ bind(&return_address); } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -6380,7 +6294,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && !use_load_acquire) { + if (!use_load_acquire) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the @@ -6457,8 +6371,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins use_load_acquire); } -void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, +void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(Location ref, Register obj, uint32_t data_offset, Location index, @@ -6472,76 +6385,57 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); - if (kBakerReadBarrierLinkTimeThunksEnableForArrays) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the holder - // and jumps to the entrypoint if needed. If the holder is not gray, - // it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto array_thunk<base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = data[index]; - // gray_return_address: + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. + // + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: - DCHECK(index.IsValid()); - Register index_reg = RegisterFrom(index, DataType::Type::kInt32); - Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + DCHECK(index.IsValid()); + Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); - __ Add(temp.X(), obj.X(), Operand(data_offset)); - { - ExactAssemblyScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - EmitBakerReadBarrierCbnz(custom_data); - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses - // macro instructions disallowed in ExactAssemblyScope. - if (kPoisonHeapReferences) { - __ neg(ref_reg, Operand(ref_reg)); - } - __ bind(&return_address); + __ Add(temp.X(), obj.X(), Operand(data_offset)); + { + ExactAssemblyScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + EmitBakerReadBarrierCbnz(custom_data); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses + // macro instructions disallowed in ExactAssemblyScope. + if (kPoisonHeapReferences) { + __ neg(ref_reg, Operand(ref_reg)); } - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); - return; + __ bind(&return_address); } - - // Array cells are never volatile variables, therefore array loads - // never use Load-Acquire instructions on ARM64. - const bool use_load_acquire = false; - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - data_offset, - index, - scale_factor, - temp, - needs_null_check, - use_load_acquire); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); } void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index c07d1eaf95..5aeb0b4034 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -672,8 +672,7 @@ class CodeGeneratorARM64 : public CodeGenerator { bool use_load_acquire); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, + void GenerateArrayLoadWithBakerReadBarrier(Location ref, vixl::aarch64::Register obj, uint32_t data_offset, Location index, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 47d7360a5d..3e63c2674c 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -85,15 +85,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. -// For the Baker read barrier implementation using link-generated thunks we need to split +// For the Baker read barrier implementation using link-time generated thunks we need to split // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; -// Flags controlling the use of link-time generated thunks for Baker read barriers. -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; - // Using a base helps identify when we hit Marking Register check breakpoints. constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; @@ -788,50 +783,6 @@ class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL); }; -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL); -}; - // Slow path loading `obj`'s lock word, loading a reference from // object `*(obj + offset + (index << scale_factor))` into `ref`, and // marking `ref` if `obj` is gray according to the lock word (Baker @@ -5964,16 +5915,10 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation()) { - // If link-time thunks for the Baker read barrier are enabled, for AOT - // loads we need a temporary only if the offset is too big. - if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { - locations->AddTemp(Location::RequiresRegister()); - } - } else { + // We need a temporary register for the read barrier load in + // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } } @@ -6388,12 +6333,11 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - instruction->GetIndex()->IsConstant()) { + if (instruction->GetIndex()->IsConstant()) { // Array loads with constant index are treated as field loads. - // If link-time thunks for the Baker read barrier are enabled, for AOT - // constant index loads we need a temporary only if the offset is too big. + // We need a temporary register for the read barrier load in + // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); offset += index << DataType::SizeShift(DataType::Type::kReference); @@ -6401,9 +6345,8 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { locations->AddTemp(Location::RequiresRegister()); } } else { - // If using introspection, we need a non-scratch temporary for the array data pointer. - // Otherwise, we need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. + // We need a non-scratch temporary for the array data pointer in + // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(). locations->AddTemp(Location::RequiresRegister()); } } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { @@ -6533,7 +6476,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } else { Location temp = locations->GetTemp(0); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); } } else { vixl32::Register out = OutputRegister(instruction); @@ -8797,72 +8740,41 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the reference - // and jumps to the entrypoint if needed. - // - // lr = &return_address; - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto gc_root_thunk<root_reg>(lr) - // } - // return_address: - UseScratchRegisterScope temps(GetVIXLAssembler()); - temps.Exclude(ip); - bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); - uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); - - vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - // Currently the offset is always within range. If that changes, - // we shall have to split the load the same way as for fields. - DCHECK_LT(offset, kReferenceLoadMinFarOffset); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); - EmitBakerReadBarrierBne(custom_data); - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); - } else { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will - // be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); - AddSlowPath(slow_path); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); + + vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); + EmitBakerReadBarrierBne(custom_data); + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -8890,86 +8802,76 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the holder - // and jumps to the entrypoint if needed. If the holder is not gray, - // it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto field_thunk<holder_reg, base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = *(obj+offset); - // gray_return_address: - - DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); - vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); - vixl32::Register base = obj; - if (offset >= kReferenceLoadMinFarOffset) { - base = RegisterFrom(temp); - static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); - __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); - offset &= (kReferenceLoadMinFarOffset - 1u); - // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large - // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely - // increase the overall code size when taking the generated thunks into account. - DCHECK(!narrow); - } - UseScratchRegisterScope temps(GetVIXLAssembler()); - temps.Exclude(ip); - uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. + // + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); + } + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow); - { - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitBakerReadBarrierBne(custom_data); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // Note: We need a specific width for the unpoisoning NEG. - if (kPoisonHeapReferences) { - if (narrow) { - // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). - __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); - } else { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); - } + { + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitBakerReadBarrierBne(custom_data); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a specific width for the unpoisoning NEG. + if (kPoisonHeapReferences) { + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); - return; + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - - // /* HeapReference<Object> */ ref = *(obj + offset) - Location no_index = Location::NoLocation(); - ScaleFactor no_scale_factor = TIMES_1; - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check); + MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); } -void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, +void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, vixl32::Register obj, uint32_t data_offset, Location index, @@ -8983,65 +8885,57 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); ScaleFactor scale_factor = TIMES_4; - if (kBakerReadBarrierLinkTimeThunksEnableForArrays) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the holder - // and jumps to the entrypoint if needed. If the holder is not gray, - // it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto array_thunk<base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = data[index]; - // gray_return_address: - - DCHECK(index.IsValid()); - vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); - vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. + // + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: - UseScratchRegisterScope temps(GetVIXLAssembler()); - temps.Exclude(ip); - uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + DCHECK(index.IsValid()); + vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. - __ Add(data_reg, obj, Operand(data_offset)); - { - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitBakerReadBarrierBne(custom_data); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - // Note: We need a Wide NEG for the unpoisoning. - if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); - } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + + __ Add(data_reg, obj, Operand(data_offset)); + { + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitBakerReadBarrierBne(custom_data); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } - MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip)); - return; + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); } - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); + MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip)); } void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index ef82f2e904..0106236b17 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -632,8 +632,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, + void GenerateArrayLoadWithBakerReadBarrier(Location ref, vixl::aarch32::Register obj, uint32_t data_offset, Location index, |