Use the holder's gray bit in Baker read barrier slow paths (ARM, ARM64).
In compiler-generated code, when deciding whether to mark
a heap reference or not in a read barrier, after checking
whether the GC is currently marking, also check (in the
slow path) whether the lock word of the reference's holder
is gray, before actually marking the reference.
This change is only for ARM and ARM64, as it does not
benefit x86 nor x86-64.
Test: Run ART tests in Baker read barrier configuration.
Test: Boot a device in Baker read barrier configuration.
Bug: 29516974
Change-Id: I60595a8f4987747faeaa359ad873e9758c1ded75
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 710ca7a..2560c9f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -636,10 +636,75 @@
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM {
+ protected:
+ ReadBarrierMarkSlowPathBaseARM(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM"; }
+
+ // Generate assembly code calling the read barrier marking runtime
+ // entry point (ReadBarrierMarkRegX).
+ void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ Register ref_reg = ref_.AsRegister<Register>();
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ blx(entrypoint_.AsRegister<Register>());
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ }
+
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if it is already loaded.
+ const Location entrypoint_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM);
+};
+
// Slow path marking an object reference `ref` during a read
// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+// reference does not get updated by this slow path after marking.
//
// This means that after the execution of this slow path, `ref` will
// always be up-to-date, but `obj.field` may not; i.e., after the
@@ -651,12 +716,12 @@
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
+class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
public:
ReadBarrierMarkSlowPathARM(HInstruction* instruction,
Location ref,
Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -664,15 +729,77 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ if (kIsDebugBuild) {
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ }
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+ __ b(GetExitLabel());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
+ public:
+ LoadReferenceWithBakerReadBarrierSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
+ obj_(obj),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierSlowPathARM";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK_NE(ref_reg, temp_);
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
- instruction_->IsLoadClass() ||
- instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
@@ -686,158 +813,202 @@
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(ref_reg, SP);
- DCHECK_NE(ref_reg, LR);
- DCHECK_NE(ref_reg, PC);
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_reg, IP);
- DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
- // "Compact" slow path, saving two moves.
+
+ // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
//
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
//
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ blx(entrypoint_.AsRegister<Register>());
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
}
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ add(obj_, obj_, ShifterOperand(temp_, LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
+ __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+
__ b(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ Register obj_;
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`.
+ Register temp_;
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM);
};
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARM above, which never
+// tries to update `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+ : public ReadBarrierMarkSlowPathBaseARM {
public:
- ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
- Location ref,
- Register obj,
- Location field_offset,
- Register temp1,
- Register temp2,
- Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM(instruction),
- ref_(ref),
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ Register temp1,
+ Register temp2,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
obj_(obj),
- field_offset_(field_offset),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
temp1_(temp1),
- temp2_(temp2),
- entrypoint_(entrypoint) {
+ temp2_(temp2) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM";
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
- // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK_NE(ref_reg, temp1_);
+
+ // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+ DCHECK_EQ(offset_, 0u);
+ DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset = index_;
+ DCHECK(field_offset.IsRegisterPair()) << field_offset;
__ Bind(GetEntryLabel());
- // Save the old reference.
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp1`.
+ __ add(obj_, obj_, ShifterOperand(temp1_, LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
+ __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
+
+ // Save the old value of the reference before marking it.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK_NE(temp1_, IP);
__ Mov(temp1_, ref_reg);
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(ref_reg, SP);
- DCHECK_NE(ref_reg, LR);
- DCHECK_NE(ref_reg, PC);
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_reg, IP);
- DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ blx(entrypoint_.AsRegister<Register>());
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset_)`).
+ // update the field in the holder (`*(obj_ + field_offset)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
- Label done;
__ cmp(temp1_, ShifterOperand(ref_reg));
- __ b(&done, EQ);
+ __ b(GetExitLabel(), EQ);
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -850,7 +1021,7 @@
// The UnsafeCASObject intrinsic uses a register pair as field
// offset ("long offset"), of which only the low part contains
// data.
- Register offset = field_offset_.AsRegisterPairLow<Register>();
+ Register offset = field_offset.AsRegisterPairLow<Register>();
Register expected = temp1_;
Register value = ref_reg;
Register tmp_ptr = IP; // Pointer to actual memory.
@@ -900,25 +1071,27 @@
}
}
- __ Bind(&done);
__ b(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
// The register containing the object holding the marked object reference field.
const Register obj_;
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset_;
-
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`; and
+ // also to hold the original reference value, when the reference is
+ // marked.
const Register temp1_;
+ // A temporary register used in the implementation of the CAS, to
+ // update the object's reference field.
const Register temp2_;
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM);
};
// Slow path generating a read barrier for a heap reference.
@@ -7310,13 +7483,11 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // After loading the reference from `obj.field` into `ref`, query
- // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
- // need to enter the slow path to mark the reference. This
- // optimistic strategy (we expect the GC to not be marking most of
- // the time) does not check `obj`'s lock word (to see if it is a
- // gray object or not), so may sometimes mark an already marked
- // object.
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to mark the reference.
+ // Then, in the slow path, check the gray bit in the lock word of
+ // the reference's holder (`obj`) to decide whether to mark `ref` or
+ // not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
@@ -7324,14 +7495,19 @@
// that `GetIsGcMarking()` is false, and vice versa.
//
// temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
// // Slow path.
- // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
- // TODO: This temp register is only necessary when
- // `always_update_field` is true; make it optional (like `temp2`).
Register temp_reg = temp.AsRegister<Register>();
// Slow path marking the object `ref` when the GC is marking. The
@@ -7340,18 +7516,37 @@
SlowPathCodeARM* slow_path;
if (always_update_field) {
DCHECK(temp2 != nullptr);
- // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
- // of the form `obj + field_offset`, where `obj` is a register and
- // `field_offset` is a register pair (of which only the lower half
- // is used). Thus `offset` and `scale_factor` above are expected
- // to be null in this code path.
+ // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
+ // supports address of the form `obj + field_offset`, where `obj`
+ // is a register and `field_offset` is a register pair (of which
+ // only the lower half is used). Thus `offset` and `scale_factor`
+ // above are expected to be null in this code path.
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
+ Location field_offset = index;
+ slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ /* index */ field_offset,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ *temp2,
+ /* entrypoint */ temp3);
} else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
- instruction, ref, /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp3);
}
AddSlowPath(slow_path);
@@ -7361,11 +7556,11 @@
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
- // The reference load.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
+ // Fast path: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5bdaac2..7b6c97c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -633,10 +633,73 @@
}
}
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
+ protected:
+ ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
+
+ // Generate assembly code calling the read barrier marking runtime
+ // entry point (ReadBarrierMarkRegX).
+ void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ DCHECK_NE(ref_.reg(), LR);
+ DCHECK_NE(ref_.reg(), WSP);
+ DCHECK_NE(ref_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_.reg(), IP0);
+ DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in W0):
+ //
+ // W0 <- ref
+ // W0 <- ReadBarrierMark(W0)
+ // ref <- W0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ Blr(XRegisterFrom(entrypoint_));
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ }
+
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if it is already loaded.
+ const Location entrypoint_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
+};
+
// Slow path marking an object reference `ref` during a read
// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that).
+// reference does not get updated by this slow path after marking.
//
// This means that after the execution of this slow path, `ref` will
// always be up-to-date, but `obj.field` may not; i.e., after the
@@ -648,14 +711,12 @@
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
public:
ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
Location ref,
Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM64(instruction),
- ref_(ref),
- entrypoint_(entrypoint) {
+ : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -666,12 +727,75 @@
DCHECK(locations->CanCall());
DCHECK(ref_.IsRegister()) << ref_;
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
+ public:
+ LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire,
+ Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
+ obj_(obj),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ use_load_acquire_(use_load_acquire),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ DCHECK(obj_.IsW());
+ DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
- instruction_->IsLoadClass() ||
- instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
@@ -685,87 +809,129 @@
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- DCHECK_NE(ref_.reg(), LR);
- DCHECK_NE(ref_.reg(), WSP);
- DCHECK_NE(ref_.reg(), WZR);
- // IP0 is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_.reg(), IP0);
- DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
- // "Compact" slow path, saving two moves.
+
+ // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
//
- // Instead of using the standard runtime calling convention (input
- // and output in W0):
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
//
- // W0 <- ref
- // W0 <- ReadBarrierMark(W0)
- // ref <- W0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blr(XRegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
- // This runtime call does not require a stack map.
- arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
}
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ arm64_codegen->GenerateRawReferenceLoad(instruction_,
+ ref_,
+ obj_,
+ offset_,
+ index_,
+ scale_factor_,
+ /* needs_null_check */ false,
+ use_load_acquire_);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+
__ B(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ Register obj_;
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ size_t scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // Should this reference load use Load-Acquire semantics?
+ bool use_load_acquire_;
+ // A temporary register used to hold the lock word of `obj_`.
+ Register temp_;
- // The location of the entrypoint if it is already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
};
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM64 above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
+// tries to update `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+ : public ReadBarrierMarkSlowPathBaseARM64 {
public:
- ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
- Location ref,
- Register obj,
- Location field_offset,
- Register temp,
- Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM64(instruction),
- ref_(ref),
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire,
+ Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
obj_(obj),
- field_offset_(field_offset),
- temp_(temp),
- entrypoint_(entrypoint) {
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ use_load_acquire_(use_load_acquire),
+ temp_(temp) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
}
const char* GetDescription() const OVERRIDE {
- return "ReadBarrierMarkAndUpdateFieldSlowPathARM64";
+ return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -774,70 +940,82 @@
DCHECK(locations->CanCall());
DCHECK(ref_.IsRegister()) << ref_;
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK(obj_.IsW());
+ DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
+
+ // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK(field_offset_.IsRegister()) << field_offset_;
+ DCHECK_EQ(offset_, 0u);
+ DCHECK_EQ(scale_factor_, 0u);
+ DCHECK_EQ(use_load_acquire_, false);
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset = index_;
+ DCHECK(field_offset.IsRegister()) << field_offset;
__ Bind(GetEntryLabel());
- // Save the old reference.
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ arm64_codegen->GenerateRawReferenceLoad(instruction_,
+ ref_,
+ obj_,
+ offset_,
+ index_,
+ scale_factor_,
+ /* needs_null_check */ false,
+ use_load_acquire_);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
+
+ // Save the old value of the reference before marking it.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK_NE(LocationFrom(temp_).reg(), IP0);
__ Mov(temp_.W(), ref_reg);
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- DCHECK_NE(ref_.reg(), LR);
- DCHECK_NE(ref_.reg(), WSP);
- DCHECK_NE(ref_.reg(), WZR);
- // IP0 is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_.reg(), IP0);
- DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in W0):
- //
- // W0 <- ref
- // W0 <- ReadBarrierMark(W0)
- // ref <- W0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blr(XRegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
- // This runtime call does not require a stack map.
- arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset_)`).
+ // update the field in the holder (`*(obj_ + field_offset)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDXR/CMP/BNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
- vixl::aarch64::Label done;
__ Cmp(temp_.W(), ref_reg);
- __ B(eq, &done);
+ __ B(eq, GetExitLabel());
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -850,7 +1028,7 @@
// Convenience aliases.
Register base = obj_.W();
- Register offset = XRegisterFrom(field_offset_);
+ Register offset = XRegisterFrom(field_offset);
Register expected = temp_.W();
Register value = ref_reg;
Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
@@ -894,24 +1072,26 @@
}
}
- __ Bind(&done);
__ B(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
// The register containing the object holding the marked object reference field.
const Register obj_;
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset_;
-
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ size_t scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // Should this reference load use Load-Acquire semantics?
+ bool use_load_acquire_;
+ // A temporary register used to hold the lock word of `obj_`; and
+ // also to hold the original reference value, when the reference is
+ // marked.
const Register temp_;
- // The location of the entrypoint if it is already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
};
// Slow path generating a read barrier for a heap reference.
@@ -5773,13 +5953,11 @@
// `instruction->IsArrayGet()` => `!use_load_acquire`.
DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
- // After loading the reference from `obj.field` into `ref`, query
- // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
- // need to enter the slow path to mark the reference. This
- // optimistic strategy (we expect the GC to not be marking most of
- // the time) does not check `obj`'s lock word (to see if it is a
- // gray object or not), so may sometimes mark an already marked
- // object.
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to mark the reference.
+ // Then, in the slow path, check the gray bit in the lock word of
+ // the reference's holder (`obj`) to decide whether to mark `ref` or
+ // not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp2` the read barrier mark entry point
@@ -5787,10 +5965,17 @@
// that `GetIsGcMarking()` is false, and vice versa.
//
// temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
// // Slow path.
- // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
// Slow path marking the object `ref` when the GC is marking. The
@@ -5799,17 +5984,38 @@
Location temp2_loc = LocationFrom(temp2);
SlowPathCodeARM64* slow_path;
if (always_update_field) {
- // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
- // address of the form `obj + field_offset`, where `obj` is a
- // register and `field_offset` is a register. Thus `offset` and
- // `scale_factor` above are expected to be null in this code path.
+ // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+ // only supports address of the form `obj + field_offset`, where
+ // `obj` is a register and `field_offset` is a register. Thus
+ // `offset` and `scale_factor` above are expected to be null in
+ // this code path.
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, 0u); /* "times 1" */
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
- instruction, ref, obj, /* field_offset */ index, temp, /* entrypoint */ temp2_loc);
+ Location field_offset = index;
+ slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ offset,
+ /* index */ field_offset,
+ scale_factor,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
} else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
- instruction, ref, /* entrypoint */ temp2_loc);
+ slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
}
AddSlowPath(slow_path);
@@ -5819,12 +6025,12 @@
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ Ldr(temp2, MemOperand(tr, entry_point_offset));
- // The reference load.
- GenerateRawReferenceLoad(
- instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ Cbnz(temp2, slow_path->GetEntryLabel());
+ // Fast path: just load the reference.
+ GenerateRawReferenceLoad(
+ instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index c92a056..0239ac9 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -657,10 +657,75 @@
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
};
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
+ protected:
+ ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; }
+
+ // Generate assembly code calling the read barrier marking runtime
+ // entry point (ReadBarrierMarkRegX).
+ void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ vixl32::Register ref_reg = RegisterFrom(ref_);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ DCHECK(!ref_reg.Is(sp));
+ DCHECK(!ref_reg.Is(lr));
+ DCHECK(!ref_reg.Is(pc));
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK(!ref_reg.Is(ip));
+ DCHECK(ref_reg.IsRegister()) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ Blx(RegisterFrom(entrypoint_));
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ }
+
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
+};
+
// Slow path marking an object reference `ref` during a read
// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+// reference does not get updated by this slow path after marking.
//
// This means that after the execution of this slow path, `ref` will
// always be up-to-date, but `obj.field` may not; i.e., after the
@@ -671,13 +736,13 @@
//
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
-// when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
public:
ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
Location ref,
Location entrypoint = Location::NoLocation())
- : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
+ : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -685,6 +750,67 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
+ public:
+ LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ vixl32::Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
+ obj_(obj),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
vixl32::Register ref_reg = RegisterFrom(ref_);
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
@@ -692,8 +818,6 @@
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
- instruction_->IsLoadClass() ||
- instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
@@ -707,89 +831,126 @@
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
+
+ // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
- DCHECK(!ref_reg.Is(sp));
- DCHECK(!ref_reg.Is(lr));
- DCHECK(!ref_reg.Is(pc));
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK(!ref_reg.Is(ip));
- DCHECK(ref_reg.IsRegister()) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blx(RegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
}
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
+ __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+
__ B(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ vixl32::Register obj_;
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`.
+ vixl32::Register temp_;
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL);
};
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never
+// tries to update `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+ : public ReadBarrierMarkSlowPathBaseARMVIXL {
public:
- ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- Location field_offset,
- vixl32::Register temp1,
- vixl32::Register temp2,
- Location entrypoint = Location::NoLocation())
- : SlowPathCodeARMVIXL(instruction),
- ref_(ref),
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ vixl32::Register temp1,
+ vixl32::Register temp2,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
obj_(obj),
- field_offset_(field_offset),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
temp1_(temp1),
- temp2_(temp2),
- entrypoint_(entrypoint) {
+ temp2_(temp2) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
}
const char* GetDescription() const OVERRIDE {
- return "ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL";
+ return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL";
}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -797,70 +958,77 @@
vixl32::Register ref_reg = RegisterFrom(ref_);
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg());
+
+ // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+ DCHECK_EQ(offset_, 0u);
+ DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
+ Location field_offset = index_;
+ DCHECK(field_offset.IsRegisterPair()) << field_offset;
__ Bind(GetEntryLabel());
- // Save the old reference.
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
+ __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
+
+ // Save the old value of the reference before marking it.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK(!temp1_.Is(ip));
__ Mov(temp1_, ref_reg);
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
- DCHECK(!ref_reg.Is(sp));
- DCHECK(!ref_reg.Is(lr));
- DCHECK(!ref_reg.Is(pc));
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK(!ref_reg.Is(ip));
- DCHECK(ref_reg.IsRegister()) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blx(RegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset_)`).
+ // update the field in the holder (`*(obj_ + field_offset)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
- vixl32::Label done;
__ Cmp(temp1_, ref_reg);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, GetExitLabel());
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -874,7 +1042,7 @@
// The UnsafeCASObject intrinsic uses a register pair as field
// offset ("long offset"), of which only the low part contains
// data.
- vixl32::Register offset = LowRegisterFrom(field_offset_);
+ vixl32::Register offset = LowRegisterFrom(field_offset);
vixl32::Register expected = temp1_;
vixl32::Register value = ref_reg;
vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory.
@@ -930,25 +1098,27 @@
}
}
- __ Bind(&done);
__ B(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
// The register containing the object holding the marked object reference field.
const vixl32::Register obj_;
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset_;
-
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`; and
+ // also to hold the original reference value, when the reference is
+ // marked.
const vixl32::Register temp1_;
+ // A temporary register used in the implementation of the CAS, to
+ // update the object's reference field.
const vixl32::Register temp2_;
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL);
};
// Slow path generating a read barrier for a heap reference.
@@ -7392,13 +7562,11 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // After loading the reference from `obj.field` into `ref`, query
- // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
- // need to enter the slow path to mark the reference. This
- // optimistic strategy (we expect the GC to not be marking most of
- // the time) does not check `obj`'s lock word (to see if it is a
- // gray object or not), so may sometimes mark an already marked
- // object.
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to mark the reference.
+ // Then, in the slow path, check the gray bit in the lock word of
+ // the reference's holder (`obj`) to decide whether to mark `ref` or
+ // not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
@@ -7406,14 +7574,19 @@
// that `GetIsGcMarking()` is false, and vice versa.
//
// temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
// // Slow path.
- // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
- // TODO: This temp register is only necessary when
- // `always_update_field` is true; make it optional (like `temp2`).
vixl32::Register temp_reg = RegisterFrom(temp);
// Slow path marking the object `ref` when the GC is marking. The
@@ -7422,18 +7595,37 @@
SlowPathCodeARMVIXL* slow_path;
if (always_update_field) {
DCHECK(temp2 != nullptr);
- // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address
- // of the form `obj + field_offset`, where `obj` is a register and
- // `field_offset` is a register pair (of which only the lower half
- // is used). Thus `offset` and `scale_factor` above are expected
- // to be null in this code path.
+ // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+ // only supports address of the form `obj + field_offset`, where
+ // `obj` is a register and `field_offset` is a register pair (of
+ // which only the lower half is used). Thus `offset` and
+ // `scale_factor` above are expected to be null in this code path.
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
+ Location field_offset = index;
+ slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ offset,
+ /* index */ field_offset,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ *temp2,
+ /* entrypoint */ temp3);
} else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction, ref, /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp3);
}
AddSlowPath(slow_path);
@@ -7443,11 +7635,11 @@
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
- // The reference load.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
+ // Fast path: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
__ Bind(slow_path->GetExitLabel());
}