ARM/ARM64: Introspection Baker RB for intrinsics.
Namely Unsafe.getObject/-Volatile().
Test: Additional tests in 160-read-barrier-stress.
Test: m test-art-host-gtest
Test: Pixel 2 XL boots.
Test: testrunner.py --target --optimizing
Test: ART_HEAP_POISONING=true testrunner.py --target --optimizing
Bug: 36141117
Change-Id: I7305d75ab0ae8c9621843f9a382ad3a5e0aefa0b
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7aaa7bf..415c6bf 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,16 +93,6 @@
// the offset explicitly.
constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
-// Some instructions have special requirements for a temporary, for example
-// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
-// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
-// loads with large offsets need a fixed register to limit the number of link-time
-// thunks we generate. For these and similar cases, we want to reserve a specific
-// register that's neither callee-save nor an argument register. We choose x15.
-inline Location FixedTempLocation() {
- return Location::RegisterLocation(x15.GetCode());
-}
-
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
case kCondEQ: return eq;
@@ -678,170 +668,9 @@
// Slow path loading `obj`'s lock word, loading a reference from
// object `*(obj + offset + (index << scale_factor))` into `ref`, and
// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). The field `obj.field` in the object `obj` holding
-// this reference does not get updated by this slow path after marking
-// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
-// below for that).
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
- LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire,
- Register temp,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- use_load_acquire_(use_load_acquire),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(obj_.IsW());
- DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
- // Temporary register `temp_`, used to store the lock word, must
- // not be IP0 nor IP1, as we may use them to emit the reference
- // load (in the call to GenerateRawReferenceLoad below), and we
- // need the lock word to still be in `temp_` after the reference
- // load.
- DCHECK_NE(LocationFrom(temp_).reg(), IP0);
- DCHECK_NE(LocationFrom(temp_).reg(), IP1);
-
- __ Bind(GetEntryLabel());
-
- // When using MaybeGenerateReadBarrierSlow, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The slow path (for Baker's algorithm) should look like:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- //
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->GenerateRawReferenceLoad(instruction_,
- ref_,
- obj_,
- offset_,
- index_,
- scale_factor_,
- /* needs_null_check */ false,
- use_load_acquire_);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- size_t scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // Should this reference load use Load-Acquire semantics?
- bool use_load_acquire_;
- // A temporary register used to hold the lock word of `obj_`.
- Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
// read barrier). If needed, this slow path also atomically updates
// the field `obj.field` in the object `obj` holding this reference
-// after marking (contrary to
-// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
-// tries to update `obj.field`).
+// after marking.
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
@@ -914,7 +743,7 @@
__ Bind(GetEntryLabel());
- // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
+ // The implementation is:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -6288,9 +6117,8 @@
void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- Register obj,
- uint32_t offset,
- Location maybe_temp,
+ vixl::aarch64::Register obj,
+ const vixl::aarch64::MemOperand& src,
bool needs_null_check,
bool use_load_acquire) {
DCHECK(kEmitCompilerReadBarrier);
@@ -6317,6 +6145,53 @@
// HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
+ DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
+ DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data = use_load_acquire
+ ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
+ : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
+
+ {
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ EmitBakerReadBarrierCbnz(custom_data);
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+ if (use_load_acquire) {
+ DCHECK_EQ(src.GetOffset(), 0);
+ __ ldar(ref_reg, src);
+ } else {
+ __ ldr(ref_reg, src);
+ }
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
+ // macro instructions disallowed in ExactAssemblyScope.
+ if (kPoisonHeapReferences) {
+ __ neg(ref_reg, Operand(ref_reg));
+ }
+ __ bind(&return_address);
+ }
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
+}
+
+void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location maybe_temp,
+ bool needs_null_check,
+ bool use_load_acquire) {
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
Register base = obj;
if (use_load_acquire) {
@@ -6331,41 +6206,9 @@
__ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
offset &= (kReferenceLoadMinFarOffset - 1u);
}
- UseScratchRegisterScope temps(GetVIXLAssembler());
- DCHECK(temps.IsAvailable(ip0));
- DCHECK(temps.IsAvailable(ip1));
- temps.Exclude(ip0, ip1);
- uint32_t custom_data = use_load_acquire
- ? EncodeBakerReadBarrierAcquireData(base.GetCode(), obj.GetCode())
- : EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode());
-
- {
- ExactAssemblyScope guard(GetVIXLAssembler(),
- (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- EmitBakerReadBarrierCbnz(custom_data);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- if (use_load_acquire) {
- DCHECK_EQ(offset, 0u);
- __ ldar(ref_reg, MemOperand(base.X()));
- } else {
- __ ldr(ref_reg, MemOperand(base.X(), offset));
- }
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
- // macro instructions disallowed in ExactAssemblyScope.
- if (kPoisonHeapReferences) {
- __ neg(ref_reg, Operand(ref_reg));
- }
- __ bind(&return_address);
- }
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
+ MemOperand src(base.X(), offset);
+ GenerateFieldLoadWithBakerReadBarrier(
+ instruction, ref, obj, src, needs_null_check, use_load_acquire);
}
void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(Location ref,
@@ -6435,65 +6278,6 @@
MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
}
-void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- Register temp,
- bool needs_null_check,
- bool use_load_acquire) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- // If we are emitting an array load, we should not be using a
- // Load Acquire instruction. In other words:
- // `instruction->IsArrayGet()` => `!use_load_acquire`.
- DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
-
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // }
-
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will be loaded by the slow path code.
- SlowPathCodeARM64* slow_path =
- new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp);
- AddSlowPath(slow_path);
-
- __ Cbnz(mr, slow_path->GetEntryLabel());
- // Fast path: the GC is not marking: just load the reference.
- GenerateRawReferenceLoad(
- instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
- __ Bind(slow_path->GetExitLabel());
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
-}
-
void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 6a358a4..f3f5079 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -92,6 +92,16 @@
((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
vixl::aarch64::lr);
+// Some instructions have special requirements for a temporary, for example
+// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
+// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
+// loads with large offsets need a fixed register to limit the number of link-time
+// thunks we generate. For these and similar cases, we want to reserve a specific
+// register that's neither callee-save nor an argument register. We choose x15.
+inline Location FixedTempLocation() {
+ return Location::RegisterLocation(vixl::aarch64::x15.GetCode());
+}
+
// Callee-save registers AAPCS64, without x19 (Thread Register) (nor
// x20 (Marking Register) when emitting Baker read barriers).
const vixl::aarch64::CPURegList callee_saved_core_registers(
@@ -663,6 +673,15 @@
ReadBarrierOption read_barrier_option);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
+ // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch64::Register obj,
+ const vixl::aarch64::MemOperand& src,
+ bool needs_null_check,
+ bool use_load_acquire);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch64::Register obj,
@@ -678,21 +697,6 @@
Location index,
vixl::aarch64::Register temp,
bool needs_null_check);
- // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
- // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
- //
- // Load the object reference located at the address
- // `obj + offset + (index << scale_factor)`, held by object `obj`, into
- // `ref`, and mark it if needed.
- void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl::aarch64::Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- vixl::aarch64::Register temp,
- bool needs_null_check,
- bool use_load_acquire);
// Generate code checking whether the the reference field at the
// address `obj + field_offset`, held by object `obj`, needs to be
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 3e63c26..ce93afc 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -786,160 +786,9 @@
// Slow path loading `obj`'s lock word, loading a reference from
// object `*(obj + offset + (index << scale_factor))` into `ref`, and
// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). The field `obj.field` in the object `obj` holding
-// this reference does not get updated by this slow path after marking
-// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
-// below for that).
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- vixl32::Register temp,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- vixl32::Register ref_reg = RegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
- // Temporary register `temp_`, used to store the lock word, must
- // not be IP, as we may use it to emit the reference load (in the
- // call to GenerateRawReferenceLoad below), and we need the lock
- // word to still be in `temp_` after the reference load.
- DCHECK(!temp_.Is(ip));
-
- __ Bind(GetEntryLabel());
-
- // When using MaybeGenerateReadBarrierSlow, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The slow path (for Baker's algorithm) should look like:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- //
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
-
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
- __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- vixl32::Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`.
- vixl32::Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
// read barrier). If needed, this slow path also atomically updates
// the field `obj.field` in the object `obj` holding this reference
-// after marking (contrary to
-// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never
-// tries to update `obj.field`).
+// after marking.
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
@@ -1006,7 +855,7 @@
__ Bind(GetEntryLabel());
- // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's:
+ // The implementation is:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -8796,8 +8645,7 @@
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
- uint32_t offset,
- Location temp,
+ const vixl32::MemOperand& src,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -8823,23 +8671,15 @@
// HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
- DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ DCHECK(src.GetAddrMode() == vixl32::Offset);
+ DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
- vixl32::Register base = obj;
- if (offset >= kReferenceLoadMinFarOffset) {
- base = RegisterFrom(temp);
- static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
- __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
- offset &= (kReferenceLoadMinFarOffset - 1u);
- // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
- // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
- // increase the overall code size when taking the generated thunks into account.
- DCHECK(!narrow);
- }
+ bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
+
UseScratchRegisterScope temps(GetVIXLAssembler());
temps.Exclude(ip);
- uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
+ uint32_t custom_data =
+ EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
{
vixl::EmissionCheckScope guard(
@@ -8850,7 +8690,7 @@
__ cmp(mr, Operand(0));
EmitBakerReadBarrierBne(custom_data);
ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
@@ -8871,6 +8711,24 @@
MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
}
+void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = RegisterFrom(temp);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ }
+ GenerateFieldLoadWithBakerReadBarrier(
+ instruction, ref, obj, MemOperand(base, offset), needs_null_check);
+}
+
void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
vixl32::Register obj,
uint32_t data_offset,
@@ -8938,53 +8796,6 @@
MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip));
}
-void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- Location temp,
- bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
-
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // }
-
- vixl32::Register temp_reg = RegisterFrom(temp);
-
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will be loaded by the slow path code.
- SlowPathCodeARMVIXL* slow_path =
- new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
- instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg);
- AddSlowPath(slow_path);
-
- __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
- // Fast path: the GC is not marking: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
- __ Bind(slow_path->GetExitLabel());
- MaybeGenerateMarkingRegisterCheck(/* code */ 22);
-}
-
void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 0106236..2fd18ca 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -624,6 +624,14 @@
ReadBarrierOption read_barrier_option);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
+ // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ const vixl::aarch32::MemOperand& src,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch32::Register obj,
@@ -638,20 +646,6 @@
Location index,
Location temp,
bool needs_null_check);
- // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
- // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
- //
- // Load the object reference located at the address
- // `obj + offset + (index << scale_factor)`, held by object `obj`, into
- // `ref`, and mark it if needed.
- void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- Location temp,
- bool needs_null_check);
// Generate code checking whether the the reference field at the
// address `obj + field_offset`, held by object `obj`, needs to be
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4b2bcc8..e95cc6e 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -745,15 +745,15 @@
if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
Register temp = WRegisterFrom(locations->GetTemp(0));
- codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
- trg_loc,
- base,
- /* offset */ 0u,
- /* index */ offset_loc,
- /* scale_factor */ 0u,
- temp,
- /* needs_null_check */ false,
- is_volatile);
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
+ // Piggy-back on the field load path using introspection for the Baker read barrier.
+ __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits.
+ codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ trg_loc,
+ base,
+ MemOperand(temp.X()),
+ /* needs_null_check */ false,
+ is_volatile);
} else {
// Other cases.
MemOperand mem_op(base.X(), offset);
@@ -782,9 +782,9 @@
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ // We need a temporary register for the read barrier load in order to use
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
+ locations->AddTemp(FixedTempLocation());
}
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 2963308..b920750 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -638,8 +638,11 @@
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
Location temp = locations->GetTemp(0);
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
+ // Piggy-back on the field load path using introspection for the Baker read barrier.
+ __ Add(RegisterFrom(temp), base, Operand(offset));
+ MemOperand src(RegisterFrom(temp), 0);
+ codegen->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, trg_loc, base, src, /* needs_null_check */ false);
if (is_volatile) {
__ Dmb(vixl32::ISH);
}