ARM/ARM64: Clean up Baker RB introspection codegen.
Remove the guard flags and remove unused code.
Avoid unnecessary temporaries for JIT. This was missed in
https://android-review.googlesource.com/725705
Test: m test-art-host-gtest
Test: Pixel 2 XL boots.
Test: m test-art-target-gtest
Test: testrunner.py --target --optimizing --jit
Bug: 36141117
Change-Id: Ic1bdc640db3f18d7169b0e62644f190e65a98d38
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 260920c..723446b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -89,15 +89,10 @@
// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
-// For the Baker read barrier implementation using link-generated thunks we need to split
+// For the Baker read barrier implementation using link-time generated thunks we need to split
// the offset explicitly.
constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
-// Flags controlling the use of link-time generated thunks for Baker read barriers.
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
-
// Some instructions have special requirements for a temporary, for example
// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
@@ -680,50 +675,6 @@
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
};
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
- ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
-
- __ Bind(GetEntryLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
- __ B(GetExitLabel());
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
-};
-
// Slow path loading `obj`'s lock word, loading a reference from
// object `*(obj + offset + (index << scale_factor))` into `ref`, and
// marking `ref` if `obj` is gray according to the lock word (Baker
@@ -2324,17 +2275,16 @@
: LocationSummary::kNoCall);
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation() &&
- !field_info.IsVolatile()) {
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // non-volatile loads we need a temporary only if the offset is too big.
+ if (!field_info.IsVolatile()) {
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the offset is too big.
if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
locations->AddTemp(FixedTempLocation());
}
} else {
+ // Volatile fields need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -2798,14 +2748,11 @@
: LocationSummary::kNoCall);
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation() &&
- instruction->GetIndex()->IsConstant()) {
+ if (instruction->GetIndex()->IsConstant()) {
// Array loads with constant index are treated as field loads.
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // constant index loads we need a temporary only if the offset is too big.
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the offset is too big.
uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
offset += index << DataType::SizeShift(DataType::Type::kReference);
@@ -2813,6 +2760,8 @@
locations->AddTemp(FixedTempLocation());
}
} else {
+ // We need a non-scratch temporary for the array data pointer in
+ // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier().
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -2868,7 +2817,7 @@
} else {
Register temp = WRegisterFrom(locations->GetTemp(0));
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
+ out, obj.W(), offset, index, temp, /* needs_null_check */ false);
}
} else {
// General case.
@@ -6276,74 +6225,39 @@
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // We use shared thunks for the slow path; shared within the method
- // for JIT, across methods for AOT. That thunk checks the reference
- // and jumps to the entrypoint if needed.
- //
- // lr = &return_address;
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto gc_root_thunk<root_reg>(lr)
- // }
- // return_address:
- UseScratchRegisterScope temps(GetVIXLAssembler());
- DCHECK(temps.IsAvailable(ip0));
- DCHECK(temps.IsAvailable(ip1));
- temps.Exclude(ip0, ip1);
- uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the reference
+ // and jumps to the entrypoint if needed.
+ //
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- if (fixup_label != nullptr) {
- __ bind(fixup_label);
- }
- static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 instruction (8B) before the return address label.");
- __ ldr(root_reg, MemOperand(obj.X(), offset));
- EmitBakerReadBarrierCbnz(custom_data);
- __ bind(&return_address);
- } else {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call.
- // }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
- // Slow path marking the GC root `root`. The entrypoint will
- // be loaded by the slow path code.
- SlowPathCodeARM64* slow_path =
- new (GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
- AddSlowPath(slow_path);
-
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- if (fixup_label == nullptr) {
- __ Ldr(root_reg, MemOperand(obj, offset));
- } else {
- EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
- }
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- __ Cbnz(mr, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ if (fixup_label != nullptr) {
+ __ bind(fixup_label);
}
+ static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+ "GC root LDR must be 2 instruction (8B) before the return address label.");
+ __ ldr(root_reg, MemOperand(obj.X(), offset));
+ EmitBakerReadBarrierCbnz(custom_data);
+ __ bind(&return_address);
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -6380,7 +6294,7 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- if (kBakerReadBarrierLinkTimeThunksEnableForFields && !use_load_acquire) {
+ if (!use_load_acquire) {
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
// Marking Register) to decide whether we need to enter the slow
// path to mark the reference. Then, in the slow path, check the
@@ -6457,8 +6371,7 @@
use_load_acquire);
}
-void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(Location ref,
Register obj,
uint32_t data_offset,
Location index,
@@ -6472,76 +6385,57 @@
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
- if (kBakerReadBarrierLinkTimeThunksEnableForArrays) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // We use shared thunks for the slow path; shared within the method
- // for JIT, across methods for AOT. That thunk checks the holder
- // and jumps to the entrypoint if needed. If the holder is not gray,
- // it creates a fake dependency and returns to the LDR instruction.
- //
- // lr = &gray_return_address;
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto array_thunk<base_reg>(lr)
- // }
- // not_gray_return_address:
- // // Original reference load. If the offset is too large to fit
- // // into LDR, we use an adjusted base register here.
- // HeapReference<mirror::Object> reference = data[index];
- // gray_return_address:
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the holder
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it creates a fake dependency and returns to the LDR instruction.
+ //
+ // lr = &gray_return_address;
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto array_thunk<base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
- DCHECK(index.IsValid());
- Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
- Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+ DCHECK(index.IsValid());
+ Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
+ Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- UseScratchRegisterScope temps(GetVIXLAssembler());
- DCHECK(temps.IsAvailable(ip0));
- DCHECK(temps.IsAvailable(ip1));
- temps.Exclude(ip0, ip1);
- uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
- __ Add(temp.X(), obj.X(), Operand(data_offset));
- {
- ExactAssemblyScope guard(GetVIXLAssembler(),
- (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- EmitBakerReadBarrierCbnz(custom_data);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
- DCHECK(!needs_null_check); // The thunk cannot handle the null check.
- // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
- // macro instructions disallowed in ExactAssemblyScope.
- if (kPoisonHeapReferences) {
- __ neg(ref_reg, Operand(ref_reg));
- }
- __ bind(&return_address);
+ __ Add(temp.X(), obj.X(), Operand(data_offset));
+ {
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ EmitBakerReadBarrierCbnz(custom_data);
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
+ // macro instructions disallowed in ExactAssemblyScope.
+ if (kPoisonHeapReferences) {
+ __ neg(ref_reg, Operand(ref_reg));
}
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
- return;
+ __ bind(&return_address);
}
-
- // Array cells are never volatile variables, therefore array loads
- // never use Load-Acquire instructions on ARM64.
- const bool use_load_acquire = false;
-
- // /* HeapReference<Object> */ ref =
- // *(obj + data_offset + index * sizeof(HeapReference<Object>))
- GenerateReferenceLoadWithBakerReadBarrier(instruction,
- ref,
- obj,
- data_offset,
- index,
- scale_factor,
- temp,
- needs_null_check,
- use_load_acquire);
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
}
void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index c07d1ea..5aeb0b4 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -672,8 +672,7 @@
bool use_load_acquire);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
- void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
+ void GenerateArrayLoadWithBakerReadBarrier(Location ref,
vixl::aarch64::Register obj,
uint32_t data_offset,
Location index,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 47d7360..3e63c26 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -85,15 +85,10 @@
// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
-// For the Baker read barrier implementation using link-generated thunks we need to split
+// For the Baker read barrier implementation using link-time generated thunks we need to split
// the offset explicitly.
constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
-// Flags controlling the use of link-time generated thunks for Baker read barriers.
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
-
// Using a base helps identify when we hit Marking Register check breakpoints.
constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
@@ -788,50 +783,6 @@
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
};
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
-
- __ Bind(GetEntryLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
- __ B(GetExitLabel());
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
-};
-
// Slow path loading `obj`'s lock word, loading a reference from
// object `*(obj + offset + (index << scale_factor))` into `ref`, and
// marking `ref` if `obj` is gray according to the lock word (Baker
@@ -5964,16 +5915,10 @@
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation()) {
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // loads we need a temporary only if the offset is too big.
- if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
- locations->AddTemp(Location::RequiresRegister());
- }
- } else {
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -6388,12 +6333,11 @@
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation() &&
- instruction->GetIndex()->IsConstant()) {
+ if (instruction->GetIndex()->IsConstant()) {
// Array loads with constant index are treated as field loads.
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // constant index loads we need a temporary only if the offset is too big.
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the offset is too big.
uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
offset += index << DataType::SizeShift(DataType::Type::kReference);
@@ -6401,9 +6345,8 @@
locations->AddTemp(Location::RequiresRegister());
}
} else {
- // If using introspection, we need a non-scratch temporary for the array data pointer.
- // Otherwise, we need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
+ // We need a non-scratch temporary for the array data pointer in
+ // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
locations->AddTemp(Location::RequiresRegister());
}
} else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
@@ -6533,7 +6476,7 @@
} else {
Location temp = locations->GetTemp(0);
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
}
} else {
vixl32::Register out = OutputRegister(instruction);
@@ -8797,72 +8740,41 @@
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // We use shared thunks for the slow path; shared within the method
- // for JIT, across methods for AOT. That thunk checks the reference
- // and jumps to the entrypoint if needed.
- //
- // lr = &return_address;
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto gc_root_thunk<root_reg>(lr)
- // }
- // return_address:
- UseScratchRegisterScope temps(GetVIXLAssembler());
- temps.Exclude(ip);
- bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
- uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the reference
+ // and jumps to the entrypoint if needed.
+ //
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- // Currently the offset is always within range. If that changes,
- // we shall have to split the load the same way as for fields.
- DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
- EmitBakerReadBarrierBne(custom_data);
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
- : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
- } else {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call.
- // }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
- // Slow path marking the GC root `root`. The entrypoint will
- // be loaded by the slow path code.
- SlowPathCodeARMVIXL* slow_path =
- new (GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
- AddSlowPath(slow_path);
-
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
+ EmitBakerReadBarrierBne(custom_data);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -8890,86 +8802,76 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // We use shared thunks for the slow path; shared within the method
- // for JIT, across methods for AOT. That thunk checks the holder
- // and jumps to the entrypoint if needed. If the holder is not gray,
- // it creates a fake dependency and returns to the LDR instruction.
- //
- // lr = &gray_return_address;
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto field_thunk<holder_reg, base_reg>(lr)
- // }
- // not_gray_return_address:
- // // Original reference load. If the offset is too large to fit
- // // into LDR, we use an adjusted base register here.
- // HeapReference<mirror::Object> reference = *(obj+offset);
- // gray_return_address:
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the holder
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it creates a fake dependency and returns to the LDR instruction.
+ //
+ // lr = &gray_return_address;
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
- DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
- vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
- vixl32::Register base = obj;
- if (offset >= kReferenceLoadMinFarOffset) {
- base = RegisterFrom(temp);
- static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
- __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
- offset &= (kReferenceLoadMinFarOffset - 1u);
- // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
- // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
- // increase the overall code size when taking the generated thunks into account.
- DCHECK(!narrow);
- }
- UseScratchRegisterScope temps(GetVIXLAssembler());
- temps.Exclude(ip);
- uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
-
- {
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- EmitBakerReadBarrierBne(custom_data);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- // Note: We need a specific width for the unpoisoning NEG.
- if (kPoisonHeapReferences) {
- if (narrow) {
- // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
- __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
- } else {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
- }
- }
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
- : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
- }
- MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
- return;
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
+ vixl32::Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = RegisterFrom(temp);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
- // /* HeapReference<Object> */ ref = *(obj + offset)
- Location no_index = Location::NoLocation();
- ScaleFactor no_scale_factor = TIMES_1;
- GenerateReferenceLoadWithBakerReadBarrier(
- instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check);
+ {
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ EmitBakerReadBarrierBne(custom_data);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // Note: We need a specific width for the unpoisoning NEG.
+ if (kPoisonHeapReferences) {
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ }
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+ }
+ MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
}
-void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
+void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
vixl32::Register obj,
uint32_t data_offset,
Location index,
@@ -8983,65 +8885,57 @@
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
ScaleFactor scale_factor = TIMES_4;
- if (kBakerReadBarrierLinkTimeThunksEnableForArrays) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // We use shared thunks for the slow path; shared within the method
- // for JIT, across methods for AOT. That thunk checks the holder
- // and jumps to the entrypoint if needed. If the holder is not gray,
- // it creates a fake dependency and returns to the LDR instruction.
- //
- // lr = &gray_return_address;
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto array_thunk<base_reg>(lr)
- // }
- // not_gray_return_address:
- // // Original reference load. If the offset is too large to fit
- // // into LDR, we use an adjusted base register here.
- // HeapReference<mirror::Object> reference = data[index];
- // gray_return_address:
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the holder
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it creates a fake dependency and returns to the LDR instruction.
+ //
+ // lr = &gray_return_address;
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto array_thunk<base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
- DCHECK(index.IsValid());
- vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
- vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
+ DCHECK(index.IsValid());
+ vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
+ vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+ vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
- UseScratchRegisterScope temps(GetVIXLAssembler());
- temps.Exclude(ip);
- uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
- __ Add(data_reg, obj, Operand(data_offset));
- {
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- EmitBakerReadBarrierBne(custom_data);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
- DCHECK(!needs_null_check); // The thunk cannot handle the null check.
- // Note: We need a Wide NEG for the unpoisoning.
- if (kPoisonHeapReferences) {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
- }
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ __ Add(data_reg, obj, Operand(data_offset));
+ {
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ EmitBakerReadBarrierBne(custom_data);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Note: We need a Wide NEG for the unpoisoning.
+ if (kPoisonHeapReferences) {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
- MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip));
- return;
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
}
-
- // /* HeapReference<Object> */ ref =
- // *(obj + data_offset + index * sizeof(HeapReference<Object>))
- GenerateReferenceLoadWithBakerReadBarrier(
- instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
+ MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip));
}
void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index ef82f2e..0106236 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -632,8 +632,7 @@
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
- void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
+ void GenerateArrayLoadWithBakerReadBarrier(Location ref,
vixl::aarch32::Register obj,
uint32_t data_offset,
Location index,