Revert "Use the holder's gray bit in Baker read barrier slow paths (ARM, ARM64)."
This reverts commit 27b1f9cbfc1409418eee4b0e22f29f033e10b64d.
This change (along with https://android-review.googlesource.com/#/c/342428/)
creates null pointer dereferences.
Bug: 35780827
Bug: 29516974
Change-Id: If731960a405f9b89528f3daaf235da57cabc5c11
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2560c9f..710ca7a 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -636,24 +636,160 @@
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
-// Abstract base class for read barrier slow paths marking a reference
-// `ref`.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM {
- protected:
- ReadBarrierMarkSlowPathBaseARM(HInstruction* instruction, Location ref, Location entrypoint)
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
+ public:
+ ReadBarrierMarkSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
: SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM"; }
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
- // Generate assembly code calling the read barrier marking runtime
- // entry point (ReadBarrierMarkRegX).
- void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+ // The read barrier instrumentation of object ArrayGet
+ // instructions does not support the HIntermediateAddress
+ // instruction.
+ DCHECK(!(instruction_->IsArrayGet() &&
+ instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ blx(entrypoint_.AsRegister<Register>());
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ __ b(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp1,
+ Register temp2,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeARM(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1),
+ temp2_(temp2),
+ entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use IP to save the old reference, as IP is
+ // used internally by the ReadBarrierMarkRegX entry point, and we
+ // need the old reference after the call to that entry point.
+ DCHECK_NE(temp1_, IP);
+ __ Mov(temp1_, ref_reg);
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
@@ -690,325 +826,18 @@
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
}
- }
-
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if it is already loaded.
- const Location entrypoint_;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM);
-};
-
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
- public:
- ReadBarrierMarkSlowPathARM(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- if (kIsDebugBuild) {
- Register ref_reg = ref_.AsRegister<Register>();
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
- }
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
-
- __ Bind(GetEntryLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
- __ b(GetExitLabel());
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). The field `obj.field` in the object `obj` holding
-// this reference does not get updated by this slow path after marking
-// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
-// below for that).
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
- public:
- LoadReferenceWithBakerReadBarrierSlowPathARM(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- Register temp,
- Location entrypoint)
- : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierSlowPathARM";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- Register ref_reg = ref_.AsRegister<Register>();
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
- DCHECK_NE(ref_reg, temp_);
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
- __ Bind(GetEntryLabel());
-
- // When using MaybeGenerateReadBarrierSlow, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- //
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- __ LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ add(obj_, obj_, ShifterOperand(temp_, LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
- __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- __ b(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`.
- Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). If needed, this slow path also atomically updates
-// the field `obj.field` in the object `obj` holding this reference
-// after marking (contrary to
-// LoadReferenceWithBakerReadBarrierSlowPathARM above, which never
-// tries to update `obj.field`).
-//
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
- : public ReadBarrierMarkSlowPathBaseARM {
- public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- Register temp1,
- Register temp2,
- Location entrypoint)
- : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp1_(temp1),
- temp2_(temp2) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- Register ref_reg = ref_.AsRegister<Register>();
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
- DCHECK_NE(ref_reg, temp1_);
-
- // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
- DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking and field updating slow path: "
- << instruction_->DebugName();
- DCHECK(instruction_->GetLocations()->Intrinsified());
- DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK_EQ(offset_, 0u);
- DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset = index_;
- DCHECK(field_offset.IsRegisterPair()) << field_offset;
-
- __ Bind(GetEntryLabel());
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp1`.
- __ add(obj_, obj_, ShifterOperand(temp1_, LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
- __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
-
- // Save the old value of the reference before marking it.
- // Note that we cannot use IP to save the old reference, as IP is
- // used internally by the ReadBarrierMarkRegX entry point, and we
- // need the old reference after the call to that entry point.
- DCHECK_NE(temp1_, IP);
- __ Mov(temp1_, ref_reg);
-
- GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset)`).
+ // update the field in the holder (`*(obj_ + field_offset_)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
+ Label done;
__ cmp(temp1_, ShifterOperand(ref_reg));
- __ b(GetExitLabel(), EQ);
+ __ b(&done, EQ);
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -1021,7 +850,7 @@
// The UnsafeCASObject intrinsic uses a register pair as field
// offset ("long offset"), of which only the low part contains
// data.
- Register offset = field_offset.AsRegisterPairLow<Register>();
+ Register offset = field_offset_.AsRegisterPairLow<Register>();
Register expected = temp1_;
Register value = ref_reg;
Register tmp_ptr = IP; // Pointer to actual memory.
@@ -1071,27 +900,25 @@
}
}
+ __ Bind(&done);
__ b(GetExitLabel());
}
private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
// The register containing the object holding the marked object reference field.
const Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`; and
- // also to hold the original reference value, when the reference is
- // marked.
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
const Register temp1_;
- // A temporary register used in the implementation of the CAS, to
- // update the object's reference field.
const Register temp2_;
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM);
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
};
// Slow path generating a read barrier for a heap reference.
@@ -7483,11 +7310,13 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // Query `art::Thread::Current()->GetIsGcMarking()` to decide
- // whether we need to enter the slow path to mark the reference.
- // Then, in the slow path, check the gray bit in the lock word of
- // the reference's holder (`obj`) to decide whether to mark `ref` or
- // not.
+ // After loading the reference from `obj.field` into `ref`, query
+ // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
+ // need to enter the slow path to mark the reference. This
+ // optimistic strategy (we expect the GC to not be marking most of
+ // the time) does not check `obj`'s lock word (to see if it is a
+ // gray object or not), so may sometimes mark an already marked
+ // object.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
@@ -7495,19 +7324,14 @@
// that `GetIsGcMarking()` is false, and vice versa.
//
// temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
// if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
// // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
// }
+ // TODO: This temp register is only necessary when
+ // `always_update_field` is true; make it optional (like `temp2`).
Register temp_reg = temp.AsRegister<Register>();
// Slow path marking the object `ref` when the GC is marking. The
@@ -7516,37 +7340,18 @@
SlowPathCodeARM* slow_path;
if (always_update_field) {
DCHECK(temp2 != nullptr);
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
- // supports address of the form `obj + field_offset`, where `obj`
- // is a register and `field_offset` is a register pair (of which
- // only the lower half is used). Thus `offset` and `scale_factor`
- // above are expected to be null in this code path.
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- temp_reg,
- *temp2,
- /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
+ instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
} else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- temp_reg,
- /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+ instruction, ref, /* entrypoint */ temp3);
}
AddSlowPath(slow_path);
@@ -7556,11 +7361,11 @@
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
+ // The reference load.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
- // Fast path: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7b6c97c..5bdaac2 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -633,23 +633,58 @@
}
}
-// Abstract base class for read barrier slow paths marking a reference
-// `ref`.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that).
//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
- protected:
- ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
- : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeARM64(instruction),
+ ref_(ref),
+ entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
- // Generate assembly code calling the read barrier marking runtime
- // entry point (ReadBarrierMarkRegX).
- void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+ // The read barrier instrumentation of object ArrayGet
+ // instructions does not support the HIntermediateAddress
+ // instruction.
+ DCHECK(!(instruction_->IsArrayGet() &&
+ instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+ __ Bind(GetEntryLabel());
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
@@ -685,253 +720,52 @@
// This runtime call does not require a stack map.
arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
}
+ __ B(GetExitLabel());
}
+ private:
// The location (register) of the marked object reference.
const Location ref_;
// The location of the entrypoint if it is already loaded.
const Location entrypoint_;
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
-};
-
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
- ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
-
- __ Bind(GetEntryLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
- __ B(GetExitLabel());
- }
-
- private:
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
};
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). The field `obj.field` in the object `obj` holding
-// this reference does not get updated by this slow path after marking
-// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
-// below for that).
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
- LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire,
- Register temp,
- Location entrypoint)
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- use_load_acquire_(use_load_acquire),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(obj_.IsW());
- DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
- __ Bind(GetEntryLabel());
-
- // When using MaybeGenerateReadBarrierSlow, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- //
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->GenerateRawReferenceLoad(instruction_,
- ref_,
- obj_,
- offset_,
- index_,
- scale_factor_,
- /* needs_null_check */ false,
- use_load_acquire_);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- size_t scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // Should this reference load use Load-Acquire semantics?
- bool use_load_acquire_;
- // A temporary register used to hold the lock word of `obj_`.
- Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). If needed, this slow path also atomically updates
-// the field `obj.field` in the object `obj` holding this reference
-// after marking (contrary to
-// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
-// tries to update `obj.field`).
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM64 above, which never tries to update
+// `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
- : public ReadBarrierMarkSlowPathBaseARM64 {
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire,
- Register temp,
- Location entrypoint)
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
+ ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeARM64(instruction),
+ ref_(ref),
obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- use_load_acquire_(use_load_acquire),
- temp_(temp) {
+ field_offset_(field_offset),
+ temp_(temp),
+ entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
}
const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
+ return "ReadBarrierMarkAndUpdateFieldSlowPathARM64";
}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -940,82 +774,70 @@
DCHECK(locations->CanCall());
DCHECK(ref_.IsRegister()) << ref_;
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(obj_.IsW());
- DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
-
- // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
+ // This slow path is only used by the UnsafeCASObject intrinsic.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK_EQ(offset_, 0u);
- DCHECK_EQ(scale_factor_, 0u);
- DCHECK_EQ(use_load_acquire_, false);
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset = index_;
- DCHECK(field_offset.IsRegister()) << field_offset;
+ DCHECK(field_offset_.IsRegister()) << field_offset_;
__ Bind(GetEntryLabel());
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->GenerateRawReferenceLoad(instruction_,
- ref_,
- obj_,
- offset_,
- index_,
- scale_factor_,
- /* needs_null_check */ false,
- use_load_acquire_);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
-
- // Save the old value of the reference before marking it.
+ // Save the old reference.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK_NE(LocationFrom(temp_).reg(), IP0);
__ Mov(temp_.W(), ref_reg);
- GenerateReadBarrierMarkRuntimeCall(codegen);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ DCHECK_NE(ref_.reg(), LR);
+ DCHECK_NE(ref_.reg(), WSP);
+ DCHECK_NE(ref_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_.reg(), IP0);
+ DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in W0):
+ //
+ // W0 <- ref
+ // W0 <- ReadBarrierMark(W0)
+ // ref <- W0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ Blr(XRegisterFrom(entrypoint_));
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset)`).
+ // update the field in the holder (`*(obj_ + field_offset_)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDXR/CMP/BNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
+ vixl::aarch64::Label done;
__ Cmp(temp_.W(), ref_reg);
- __ B(eq, GetExitLabel());
+ __ B(eq, &done);
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -1028,7 +850,7 @@
// Convenience aliases.
Register base = obj_.W();
- Register offset = XRegisterFrom(field_offset);
+ Register offset = XRegisterFrom(field_offset_);
Register expected = temp_.W();
Register value = ref_reg;
Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
@@ -1072,26 +894,24 @@
}
}
+ __ Bind(&done);
__ B(GetExitLabel());
}
private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
// The register containing the object holding the marked object reference field.
const Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- size_t scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // Should this reference load use Load-Acquire semantics?
- bool use_load_acquire_;
- // A temporary register used to hold the lock word of `obj_`; and
- // also to hold the original reference value, when the reference is
- // marked.
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
const Register temp_;
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
+ // The location of the entrypoint if it is already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
};
// Slow path generating a read barrier for a heap reference.
@@ -5953,11 +5773,13 @@
// `instruction->IsArrayGet()` => `!use_load_acquire`.
DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
- // Query `art::Thread::Current()->GetIsGcMarking()` to decide
- // whether we need to enter the slow path to mark the reference.
- // Then, in the slow path, check the gray bit in the lock word of
- // the reference's holder (`obj`) to decide whether to mark `ref` or
- // not.
+ // After loading the reference from `obj.field` into `ref`, query
+ // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
+ // need to enter the slow path to mark the reference. This
+ // optimistic strategy (we expect the GC to not be marking most of
+ // the time) does not check `obj`'s lock word (to see if it is a
+ // gray object or not), so may sometimes mark an already marked
+ // object.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp2` the read barrier mark entry point
@@ -5965,17 +5787,10 @@
// that `GetIsGcMarking()` is false, and vice versa.
//
// temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
// if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
// // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
// }
// Slow path marking the object `ref` when the GC is marking. The
@@ -5984,38 +5799,17 @@
Location temp2_loc = LocationFrom(temp2);
SlowPathCodeARM64* slow_path;
if (always_update_field) {
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
- // only supports address of the form `obj + field_offset`, where
- // `obj` is a register and `field_offset` is a register. Thus
- // `offset` and `scale_factor` above are expected to be null in
- // this code path.
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
+ // address of the form `obj + field_offset`, where `obj` is a
+ // register and `field_offset` is a register. Thus `offset` and
+ // `scale_factor` above are expected to be null in this code path.
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, 0u); /* "times 1" */
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
+ instruction, ref, obj, /* field_offset */ index, temp, /* entrypoint */ temp2_loc);
} else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
+ instruction, ref, /* entrypoint */ temp2_loc);
}
AddSlowPath(slow_path);
@@ -6025,12 +5819,12 @@
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ Ldr(temp2, MemOperand(tr, entry_point_offset));
+ // The reference load.
+ GenerateRawReferenceLoad(
+ instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ Cbnz(temp2, slow_path->GetEntryLabel());
- // Fast path: just load the reference.
- GenerateRawReferenceLoad(
- instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 0239ac9..c92a056 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -657,24 +657,162 @@
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
};
-// Abstract base class for read barrier slow paths marking a reference
-// `ref`.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
- protected:
- ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
: SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; }
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
- // Generate assembly code calling the read barrier marking runtime
- // entry point (ReadBarrierMarkRegX).
- void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
vixl32::Register ref_reg = RegisterFrom(ref_);
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+ // The read barrier instrumentation of object ArrayGet
+ // instructions does not support the HIntermediateAddress
+ // instruction.
+ DCHECK(!(instruction_->IsArrayGet() &&
+ instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ DCHECK(!ref_reg.Is(sp));
+ DCHECK(!ref_reg.Is(lr));
+ DCHECK(!ref_reg.Is(pc));
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK(!ref_reg.Is(ip));
+ DCHECK(ref_reg.IsRegister()) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ Blx(RegisterFrom(entrypoint_));
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ Location field_offset,
+ vixl32::Register temp1,
+ vixl32::Register temp2,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeARMVIXL(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1),
+ temp2_(temp2),
+ entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ vixl32::Register ref_reg = RegisterFrom(ref_);
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use IP to save the old reference, as IP is
+ // used internally by the ReadBarrierMarkRegX entry point, and we
+ // need the old reference after the call to that entry point.
+ DCHECK(!temp1_.Is(ip));
+ __ Mov(temp1_, ref_reg);
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
@@ -711,324 +849,18 @@
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
}
- }
-
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
-};
-
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
-
- __ Bind(GetEntryLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
- __ B(GetExitLabel());
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). The field `obj.field` in the object `obj` holding
-// this reference does not get updated by this slow path after marking
-// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
-// below for that).
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- vixl32::Register temp,
- Location entrypoint)
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- vixl32::Register ref_reg = RegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
- __ Bind(GetEntryLabel());
-
- // When using MaybeGenerateReadBarrierSlow, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- //
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
-
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
- __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- vixl32::Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`.
- vixl32::Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). If needed, this slow path also atomically updates
-// the field `obj.field` in the object `obj` holding this reference
-// after marking (contrary to
-// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never
-// tries to update `obj.field`).
-//
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-//
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
-class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
- : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- vixl32::Register temp1,
- vixl32::Register temp2,
- Location entrypoint)
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp1_(temp1),
- temp2_(temp2) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- vixl32::Register ref_reg = RegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg());
-
- // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
- DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking and field updating slow path: "
- << instruction_->DebugName();
- DCHECK(instruction_->GetLocations()->Intrinsified());
- DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK_EQ(offset_, 0u);
- DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
- Location field_offset = index_;
- DCHECK(field_offset.IsRegisterPair()) << field_offset;
-
- __ Bind(GetEntryLabel());
-
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
- __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
-
- // Save the old value of the reference before marking it.
- // Note that we cannot use IP to save the old reference, as IP is
- // used internally by the ReadBarrierMarkRegX entry point, and we
- // need the old reference after the call to that entry point.
- DCHECK(!temp1_.Is(ip));
- __ Mov(temp1_, ref_reg);
-
- GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset)`).
+ // update the field in the holder (`*(obj_ + field_offset_)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
+ vixl32::Label done;
__ Cmp(temp1_, ref_reg);
- __ B(eq, GetExitLabel());
+ __ B(eq, &done, /* far_target */ false);
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -1042,7 +874,7 @@
// The UnsafeCASObject intrinsic uses a register pair as field
// offset ("long offset"), of which only the low part contains
// data.
- vixl32::Register offset = LowRegisterFrom(field_offset);
+ vixl32::Register offset = LowRegisterFrom(field_offset_);
vixl32::Register expected = temp1_;
vixl32::Register value = ref_reg;
vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory.
@@ -1098,27 +930,25 @@
}
}
+ __ Bind(&done);
__ B(GetExitLabel());
}
private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
// The register containing the object holding the marked object reference field.
const vixl32::Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`; and
- // also to hold the original reference value, when the reference is
- // marked.
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
const vixl32::Register temp1_;
- // A temporary register used in the implementation of the CAS, to
- // update the object's reference field.
const vixl32::Register temp2_;
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL);
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL);
};
// Slow path generating a read barrier for a heap reference.
@@ -7562,11 +7392,13 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // Query `art::Thread::Current()->GetIsGcMarking()` to decide
- // whether we need to enter the slow path to mark the reference.
- // Then, in the slow path, check the gray bit in the lock word of
- // the reference's holder (`obj`) to decide whether to mark `ref` or
- // not.
+ // After loading the reference from `obj.field` into `ref`, query
+ // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
+ // need to enter the slow path to mark the reference. This
+ // optimistic strategy (we expect the GC to not be marking most of
+ // the time) does not check `obj`'s lock word (to see if it is a
+ // gray object or not), so may sometimes mark an already marked
+ // object.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
@@ -7574,19 +7406,14 @@
// that `GetIsGcMarking()` is false, and vice versa.
//
// temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
// if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
// // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
// }
+ // TODO: This temp register is only necessary when
+ // `always_update_field` is true; make it optional (like `temp2`).
vixl32::Register temp_reg = RegisterFrom(temp);
// Slow path marking the object `ref` when the GC is marking. The
@@ -7595,37 +7422,18 @@
SlowPathCodeARMVIXL* slow_path;
if (always_update_field) {
DCHECK(temp2 != nullptr);
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
- // only supports address of the form `obj + field_offset`, where
- // `obj` is a register and `field_offset` is a register pair (of
- // which only the lower half is used). Thus `offset` and
- // `scale_factor` above are expected to be null in this code path.
+ // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- temp_reg,
- *temp2,
- /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(
+ instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
} else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- temp_reg,
- /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction, ref, /* entrypoint */ temp3);
}
AddSlowPath(slow_path);
@@ -7635,11 +7443,11 @@
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
+ // The reference load.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
- // Fast path: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
__ Bind(slow_path->GetExitLabel());
}