summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator_arm.cc590
-rw-r--r--compiler/optimizing/code_generator_arm.h12
-rw-r--r--compiler/optimizing/code_generator_arm64.cc659
-rw-r--r--compiler/optimizing/code_generator_arm64.h14
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc616
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h14
-rw-r--r--compiler/optimizing/intrinsics_arm.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc46
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc2
-rw-r--r--runtime/lock_word.h3
-rw-r--r--runtime/type_lookup_table.h2
11 files changed, 1361 insertions, 599 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 511bd9b7ef..2b0ab3e20e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -636,10 +636,75 @@ class ArraySetSlowPathARM : public SlowPathCodeARM {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM {
+ protected:
+ ReadBarrierMarkSlowPathBaseARM(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM"; }
+
+ // Generate assembly code calling the read barrier marking runtime
+ // entry point (ReadBarrierMarkRegX).
+ void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ Register ref_reg = ref_.AsRegister<Register>();
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ blx(entrypoint_.AsRegister<Register>());
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ }
+
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if it is already loaded.
+ const Location entrypoint_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM);
+};
+
// Slow path marking an object reference `ref` during a read
// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+// reference does not get updated by this slow path after marking.
//
// This means that after the execution of this slow path, `ref` will
// always be up-to-date, but `obj.field` may not; i.e., after the
@@ -650,13 +715,13 @@ class ArraySetSlowPathARM : public SlowPathCodeARM {
//
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
-// is for the GcRoot read barrier.
-class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
public:
ReadBarrierMarkSlowPathARM(HInstruction* instruction,
Location ref,
Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -664,15 +729,77 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ if (kIsDebugBuild) {
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ }
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+ __ b(GetExitLabel());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
+ public:
+ LoadReferenceWithBakerReadBarrierSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
+ obj_(obj),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierSlowPathARM";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK_NE(ref_reg, temp_);
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
- instruction_->IsLoadClass() ||
- instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
@@ -686,145 +813,202 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(ref_reg, SP);
- DCHECK_NE(ref_reg, LR);
- DCHECK_NE(ref_reg, PC);
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_reg, IP);
- DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
- // "Compact" slow path, saving two moves.
+
+ // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
//
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
//
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ add(obj_, obj_, ShifterOperand(temp_, LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
//
- // rX <- ReadBarrierMarkRegX(rX)
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
//
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ blx(entrypoint_.AsRegister<Register>());
- } else {
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
+ __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+
__ b(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+ // The register containing the object holding the marked object reference field.
+ Register obj_;
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`.
+ Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM);
};
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARM above, which never
+// tries to update `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+ : public ReadBarrierMarkSlowPathBaseARM {
public:
- ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
- Location ref,
- Register obj,
- Location field_offset,
- Register temp1,
- Register temp2)
- : SlowPathCodeARM(instruction),
- ref_(ref),
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ Register temp1,
+ Register temp2,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
obj_(obj),
- field_offset_(field_offset),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
temp1_(temp1),
temp2_(temp2) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM";
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
- // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK_NE(ref_reg, temp1_);
+
+ // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+ DCHECK_EQ(offset_, 0u);
+ DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset = index_;
+ DCHECK(field_offset.IsRegisterPair()) << field_offset;
__ Bind(GetEntryLabel());
- // Save the old reference.
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp1`.
+ __ add(obj_, obj_, ShifterOperand(temp1_, LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
+ __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
+
+ // Save the old value of the reference before marking it.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK_NE(temp1_, IP);
__ Mov(temp1_, ref_reg);
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(ref_reg, SP);
- DCHECK_NE(ref_reg, LR);
- DCHECK_NE(ref_reg, PC);
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_reg, IP);
- DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset_)`).
+ // update the field in the holder (`*(obj_ + field_offset)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
- Label done;
__ cmp(temp1_, ShifterOperand(ref_reg));
- __ b(&done, EQ);
+ __ b(GetExitLabel(), EQ);
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -837,7 +1021,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
// The UnsafeCASObject intrinsic uses a register pair as field
// offset ("long offset"), of which only the low part contains
// data.
- Register offset = field_offset_.AsRegisterPairLow<Register>();
+ Register offset = field_offset.AsRegisterPairLow<Register>();
Register expected = temp1_;
Register value = ref_reg;
Register tmp_ptr = IP; // Pointer to actual memory.
@@ -887,22 +1071,27 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
}
}
- __ Bind(&done);
__ b(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
// The register containing the object holding the marked object reference field.
const Register obj_;
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset_;
-
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`; and
+ // also to hold the original reference value, when the reference is
+ // marked.
const Register temp1_;
+ // A temporary register used in the implementation of the CAS, to
+ // update the object's reference field.
const Register temp2_;
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM);
};
// Slow path generating a read barrier for a heap reference.
@@ -7183,14 +7372,35 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
- // Baker's read barrier are used:
+ // Baker's read barrier are used.
+ //
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
//
- // root = obj.field;
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
// }
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ LoadFromOffset(kLoadWord, root_reg, obj, offset);
static_assert(
@@ -7201,21 +7411,6 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
- Location temp = Location::RegisterLocation(LR);
- SlowPathCodeARM* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
- instruction,
- root,
- /*entrypoint*/ temp);
- codegen_->AddSlowPath(slow_path);
-
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
@@ -7286,51 +7481,101 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // In slow path based read barriers, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to mark the reference.
+ // Then, in the slow path, check the gray bit in the lock word of
+ // the reference's holder (`obj`) to decide whether to mark `ref` or
+ // not.
//
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
- // }
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp3` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp3` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
//
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // }
- Register ref_reg = ref.AsRegister<Register>();
Register temp_reg = temp.AsRegister<Register>();
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp3`.
+ Location temp3 = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path;
+ if (always_update_field) {
+ DCHECK(temp2 != nullptr);
+ // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
+ // supports address of the form `obj + field_offset`, where `obj`
+ // is a register and `field_offset` is a register pair (of which
+ // only the lower half is used). Thus `offset` and `scale_factor`
+ // above are expected to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ Location field_offset = index;
+ slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ /* index */ field_offset,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ *temp2,
+ /* entrypoint */ temp3);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp3);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
+ AddSlowPath(slow_path);
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp_reg`.
- __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
+ // Fast path: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check) {
+ Register ref_reg = ref.AsRegister<Register>();
- // The actual reference load.
if (index.IsValid()) {
// Load types involving an "index": ArrayGet,
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
// intrinsics.
- // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
size_t computed_offset =
(index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
@@ -7347,41 +7592,16 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
__ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
}
} else {
- // /* HeapReference<Object> */ ref = *(obj + offset)
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
__ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
}
- // Object* ref = ref_addr->AsMirrorPtr()
- __ MaybeUnpoisonHeapReference(ref_reg);
-
- // Slow path marking the object `ref` when it is gray.
- SlowPathCodeARM* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
- // of the form `obj + field_offset`, where `obj` is a register and
- // `field_offset` is a register pair (of which only the lower half
- // is used). Thus `offset` and `scale_factor` above are expected
- // to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
- } else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
}
- AddSlowPath(slow_path);
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
- __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS.
- __ Bind(slow_path->GetExitLabel());
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
}
void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index e993756b3b..f081a910ee 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -520,9 +520,6 @@ class CodeGeneratorARM : public CodeGenerator {
Location index,
Location temp,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
-
// Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
// GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
//
@@ -545,6 +542,15 @@ class CodeGeneratorARM : public CodeGenerator {
bool always_update_field = false,
Register* temp2 = nullptr);
+ // Generate a heap reference load (with no read barrier).
+ void GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check);
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index f5038fb1c0..7d1ae7d28b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -633,10 +633,73 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
}
}
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
+ protected:
+ ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
+
+ // Generate assembly code calling the read barrier marking runtime
+ // entry point (ReadBarrierMarkRegX).
+ void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ DCHECK_NE(ref_.reg(), LR);
+ DCHECK_NE(ref_.reg(), WSP);
+ DCHECK_NE(ref_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_.reg(), IP0);
+ DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in W0):
+ //
+ // W0 <- ref
+ // W0 <- ReadBarrierMark(W0)
+ // ref <- W0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ Blr(XRegisterFrom(entrypoint_));
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ }
+
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if it is already loaded.
+ const Location entrypoint_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
+};
+
// Slow path marking an object reference `ref` during a read
// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that).
+// reference does not get updated by this slow path after marking.
//
// This means that after the execution of this slow path, `ref` will
// always be up-to-date, but `obj.field` may not; i.e., after the
@@ -647,15 +710,13 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
//
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
-// is for the GcRoot read barrier.
-class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
public:
ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
Location ref,
Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM64(instruction),
- ref_(ref),
- entrypoint_(entrypoint) {
+ : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -666,12 +727,75 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(locations->CanCall());
DCHECK(ref_.IsRegister()) << ref_;
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
+ public:
+ LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire,
+ Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
+ obj_(obj),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ use_load_acquire_(use_load_acquire),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ DCHECK(obj_.IsW());
+ DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
- instruction_->IsLoadClass() ||
- instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
@@ -684,82 +808,138 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(!(instruction_->IsArrayGet() &&
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+ // Temporary register `temp_`, used to store the lock word, must
+ // not be IP0 nor IP1, as we may use them to emit the reference
+ // load (in the call to GenerateRawReferenceLoad below), and we
+ // need the lock word to still be in `temp_` after the reference
+ // load.
+ DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+ DCHECK_NE(LocationFrom(temp_).reg(), IP1);
+
__ Bind(GetEntryLabel());
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- DCHECK_NE(ref_.reg(), LR);
- DCHECK_NE(ref_.reg(), WSP);
- DCHECK_NE(ref_.reg(), WZR);
- // IP0 is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_.reg(), IP0);
- DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in W0):
+
+ // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
//
- // W0 <- ref
- // W0 <- ReadBarrierMark(W0)
- // ref <- W0
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
//
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ arm64_codegen->GenerateRawReferenceLoad(instruction_,
+ ref_,
+ obj_,
+ offset_,
+ index_,
+ scale_factor_,
+ /* needs_null_check */ false,
+ use_load_acquire_);
+
+ // Mark the object `ref` when `obj` is gray.
//
- // rX <- ReadBarrierMarkRegX(rX)
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
//
- if (entrypoint_.IsValid()) {
- arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blr(XRegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
- // This runtime call does not require a stack map.
- arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+
__ B(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if it is already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+ // The register containing the object holding the marked object reference field.
+ Register obj_;
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ size_t scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // Should this reference load use Load-Acquire semantics?
+ bool use_load_acquire_;
+ // A temporary register used to hold the lock word of `obj_`.
+ Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
};
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM64 above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
+// tries to update `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+ : public ReadBarrierMarkSlowPathBaseARM64 {
public:
- ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
- Location ref,
- Register obj,
- Location field_offset,
- Register temp)
- : SlowPathCodeARM64(instruction),
- ref_(ref),
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire,
+ Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
obj_(obj),
- field_offset_(field_offset),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ use_load_acquire_(use_load_acquire),
temp_(temp) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
}
const char* GetDescription() const OVERRIDE {
- return "ReadBarrierMarkAndUpdateFieldSlowPathARM64";
+ return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -768,64 +948,90 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(locations->CanCall());
DCHECK(ref_.IsRegister()) << ref_;
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK(obj_.IsW());
+ DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
+
+ // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK(field_offset_.IsRegister()) << field_offset_;
+ DCHECK_EQ(offset_, 0u);
+ DCHECK_EQ(scale_factor_, 0u);
+ DCHECK_EQ(use_load_acquire_, false);
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset = index_;
+ DCHECK(field_offset.IsRegister()) << field_offset;
+
+ // Temporary register `temp_`, used to store the lock word, must
+ // not be IP0 nor IP1, as we may use them to emit the reference
+ // load (in the call to GenerateRawReferenceLoad below), and we
+ // need the lock word to still be in `temp_` after the reference
+ // load.
+ DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+ DCHECK_NE(LocationFrom(temp_).reg(), IP1);
__ Bind(GetEntryLabel());
- // Save the old reference.
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ arm64_codegen->GenerateRawReferenceLoad(instruction_,
+ ref_,
+ obj_,
+ offset_,
+ index_,
+ scale_factor_,
+ /* needs_null_check */ false,
+ use_load_acquire_);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
+
+ // Save the old value of the reference before marking it.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK_NE(LocationFrom(temp_).reg(), IP0);
__ Mov(temp_.W(), ref_reg);
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- DCHECK_NE(ref_.reg(), LR);
- DCHECK_NE(ref_.reg(), WSP);
- DCHECK_NE(ref_.reg(), WZR);
- // IP0 is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_.reg(), IP0);
- DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in W0):
- //
- // W0 <- ref
- // W0 <- ReadBarrierMark(W0)
- // ref <- W0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
- // This runtime call does not require a stack map.
- arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset_)`).
+ // update the field in the holder (`*(obj_ + field_offset)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDXR/CMP/BNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
- vixl::aarch64::Label done;
__ Cmp(temp_.W(), ref_reg);
- __ B(eq, &done);
+ __ B(eq, GetExitLabel());
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -838,7 +1044,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
// Convenience aliases.
Register base = obj_.W();
- Register offset = XRegisterFrom(field_offset_);
+ Register offset = XRegisterFrom(field_offset);
Register expected = temp_.W();
Register value = ref_reg;
Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
@@ -882,21 +1088,26 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
}
}
- __ Bind(&done);
__ B(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
// The register containing the object holding the marked object reference field.
const Register obj_;
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset_;
-
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ size_t scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // Should this reference load use Load-Acquire semantics?
+ bool use_load_acquire_;
+ // A temporary register used to hold the lock word of `obj_`; and
+ // also to hold the original reference value, when the reference is
+ // marked.
const Register temp_;
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
};
// Slow path generating a read barrier for a heap reference.
@@ -2425,6 +2636,9 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary::kNoCall);
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -2460,7 +2674,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
- Register temp = temps.AcquireW();
+ Register temp = WRegisterFrom(locations->GetTemp(0));
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -5604,14 +5818,35 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
- // Baker's read barrier are used:
+ // Baker's read barrier are used.
+ //
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
//
- // root = obj.field;
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
// }
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Register temp = lr;
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
+ instruction, root, /* entrypoint */ LocationFrom(temp));
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp, MemOperand(tr, entry_point_offset));
+
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
@@ -5626,20 +5861,6 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- Register temp = lr;
-
- // Slow path marking the GC root `root`. The entrypoint will alrady be loaded in temp.
- SlowPathCodeARM64* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction,
- root,
- LocationFrom(temp));
- codegen_->AddSlowPath(slow_path);
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ Ldr(temp, MemOperand(tr, entry_point_offset));
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ Cbnz(temp, slow_path->GetEntryLabel());
@@ -5741,54 +5962,103 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// `instruction->IsArrayGet()` => `!use_load_acquire`.
DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
- MacroAssembler* masm = GetVIXLAssembler();
- UseScratchRegisterScope temps(masm);
-
- // In slow path based read barriers, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to mark the reference.
+ // Then, in the slow path, check the gray bit in the lock word of
+ // the reference's holder (`obj`) to decide whether to mark `ref` or
+ // not.
//
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
- // }
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
//
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // }
+
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp2`.
+ Register temp2 = lr;
+ Location temp2_loc = LocationFrom(temp2);
+ SlowPathCodeARM64* slow_path;
+ if (always_update_field) {
+ // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+ // only supports address of the form `obj + field_offset`, where
+ // `obj` is a register and `field_offset` is a register. Thus
+ // `offset` and `scale_factor` above are expected to be null in
+ // this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, 0u); /* "times 1" */
+ Location field_offset = index;
+ slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ offset,
+ /* index */ field_offset,
+ scale_factor,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
+ }
+ AddSlowPath(slow_path);
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp2, MemOperand(tr, entry_point_offset));
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Cbnz(temp2, slow_path->GetEntryLabel());
+ // Fast path: just load the reference.
+ GenerateRawReferenceLoad(
+ instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire) {
+ DCHECK(obj.IsW());
Primitive::Type type = Primitive::kPrimNot;
Register ref_reg = RegisterFrom(ref, type);
- DCHECK(obj.IsW());
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
-
- {
- // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- // /* int32_t */ monitor = obj->monitor_
- __ Ldr(temp, HeapOperand(obj, monitor_offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
+ // If needed, vixl::EmissionCheckScope guards are used to ensure
+ // that no pools are emitted between the load (macro) instruction
+ // and MaybeRecordImplicitNullCheck.
- // The actual reference load.
if (index.IsValid()) {
// Load types involving an "index": ArrayGet,
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
@@ -5803,59 +6073,50 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
<< instruction->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, 0u);
- DCHECK_EQ(needs_null_check, 0u);
- // /* HeapReference<Object> */ ref = *(obj + index)
+ DCHECK_EQ(needs_null_check, false);
+ // /* HeapReference<mirror::Object> */ ref = *(obj + index)
MemOperand field = HeapOperand(obj, XRegisterFrom(index));
LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
} else {
- // ArrayGet and UnsafeGetObject intrinsics cases.
- // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
Load(type, ref_reg, HeapOperand(obj, computed_offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
} else {
- Register temp3 = temps.AcquireW();
- __ Add(temp3, obj, offset);
- Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor));
- temps.Release(temp3);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireW();
+ __ Add(temp, obj, offset);
+ {
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
}
}
} else {
- // /* HeapReference<Object> */ ref = *(obj + offset)
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
MemOperand field = HeapOperand(obj, offset);
if (use_load_acquire) {
- LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+ // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
+ LoadAcquire(instruction, ref_reg, field, needs_null_check);
} else {
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
Load(type, ref_reg, field);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
}
}
// Object* ref = ref_addr->AsMirrorPtr()
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-
- // Slow path marking the object `ref` when it is gray.
- SlowPathCodeARM64* slow_path;
- if (always_update_field) {
- // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
- // address of the form `obj + field_offset`, where `obj` is a
- // register and `field_offset` is a register. Thus `offset` and
- // `scale_factor` above are expected to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, 0u); /* "times 1" */
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
- instruction, ref, obj, /* field_offset */ index, temp);
- } else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
- }
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
}
void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 24a602400e..7471cd5f12 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -616,8 +616,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
Location index,
vixl::aarch64::Register temp,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
//
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
@@ -636,6 +636,16 @@ class CodeGeneratorARM64 : public CodeGenerator {
bool use_load_acquire,
bool always_update_field = false);
+ // Generate a heap reference load (with no read barrier).
+ void GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ vixl::aarch64::Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire);
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 58bf2de70b..180db923bf 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -657,52 +657,25 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
};
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
- public:
- ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
+ protected:
+ ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
: SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; }
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
+ // Generate assembly code calling the read barrier marking runtime
+ // entry point (ReadBarrierMarkRegX).
+ void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
vixl32::Register ref_reg = RegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsLoadClass() ||
- instruction_->IsLoadString() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
- __ Bind(GetEntryLabel());
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
@@ -732,53 +705,258 @@ class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
__ Blx(RegisterFrom(entrypoint_));
} else {
+ // Entrypoint is not already loaded, load from the thread.
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
}
- __ B(GetExitLabel());
}
- private:
// The location (register) of the marked object reference.
const Location ref_;
// The location of the entrypoint if already loaded.
const Location entrypoint_;
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking.
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
+ public:
+ ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
+ : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+ __ B(GetExitLabel());
+ }
+
+ private:
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
};
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
+ public:
+ LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ vixl32::Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
+ obj_(obj),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ vixl32::Register ref_reg = RegisterFrom(ref_);
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+ // The read barrier instrumentation of object ArrayGet
+ // instructions does not support the HIntermediateAddress
+ // instruction.
+ DCHECK(!(instruction_->IsArrayGet() &&
+ instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+ // Temporary register `temp_`, used to store the lock word, must
+ // not be IP, as we may use it to emit the reference load (in the
+ // call to GenerateRawReferenceLoad below), and we need the lock
+ // word to still be in `temp_` after the reference load.
+ DCHECK(!temp_.Is(ip));
+
+ __ Bind(GetEntryLabel());
+
+ // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
+ __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The register containing the object holding the marked object reference field.
+ vixl32::Register obj_;
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`.
+ vixl32::Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never
+// tries to update `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+//
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+ : public ReadBarrierMarkSlowPathBaseARMVIXL {
public:
- ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- Location field_offset,
- vixl32::Register temp1,
- vixl32::Register temp2)
- : SlowPathCodeARMVIXL(instruction),
- ref_(ref),
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ vixl32::Register temp1,
+ vixl32::Register temp2,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
obj_(obj),
- field_offset_(field_offset),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
temp1_(temp1),
temp2_(temp2) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
}
const char* GetDescription() const OVERRIDE {
- return "ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL";
+ return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL";
}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -786,64 +964,83 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL
vixl32::Register ref_reg = RegisterFrom(ref_);
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg());
+
+ // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+ DCHECK_EQ(offset_, 0u);
+ DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
+ Location field_offset = index_;
+ DCHECK(field_offset.IsRegisterPair()) << field_offset;
+
+ // Temporary register `temp1_`, used to store the lock word, must
+ // not be IP, as we may use it to emit the reference load (in the
+ // call to GenerateRawReferenceLoad below), and we need the lock
+ // word to still be in `temp1_` after the reference load.
+ DCHECK(!temp1_.Is(ip));
__ Bind(GetEntryLabel());
- // Save the old reference.
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
+ __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
+
+ // Save the old value of the reference before marking it.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK(!temp1_.Is(ip));
__ Mov(temp1_, ref_reg);
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
- DCHECK(!ref_reg.Is(sp));
- DCHECK(!ref_reg.Is(lr));
- DCHECK(!ref_reg.Is(pc));
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK(!ref_reg.Is(ip));
- DCHECK(ref_reg.IsRegister()) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset_)`).
+ // update the field in the holder (`*(obj_ + field_offset)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
- vixl32::Label done;
__ Cmp(temp1_, ref_reg);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, GetExitLabel());
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -857,7 +1054,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL
// The UnsafeCASObject intrinsic uses a register pair as field
// offset ("long offset"), of which only the low part contains
// data.
- vixl32::Register offset = LowRegisterFrom(field_offset_);
+ vixl32::Register offset = LowRegisterFrom(field_offset);
vixl32::Register expected = temp1_;
vixl32::Register value = ref_reg;
vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory.
@@ -913,22 +1110,27 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL
}
}
- __ Bind(&done);
__ B(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
// The register containing the object holding the marked object reference field.
const vixl32::Register obj_;
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset_;
-
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`; and
+ // also to hold the original reference value, when the reference is
+ // marked.
const vixl32::Register temp1_;
+ // A temporary register used in the implementation of the CAS, to
+ // update the object's reference field.
const vixl32::Register temp2_;
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL);
};
// Slow path generating a read barrier for a heap reference.
@@ -7261,14 +7463,35 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
- // Baker's read barrier are used:
+ // Baker's read barrier are used.
+ //
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
//
- // root = obj.field;
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
// }
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
static_assert(
@@ -7279,21 +7502,6 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
- Location temp = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction,
- root,
- /*entrypoint*/ temp);
- codegen_->AddSlowPath(slow_path);
-
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
@@ -7364,55 +7572,114 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // In slow path based read barriers, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to mark the reference.
+ // Then, in the slow path, check the gray bit in the lock word of
+ // the reference's holder (`obj`) to decide whether to mark `ref` or
+ // not.
//
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
- // }
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp3` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp3` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
//
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // }
- vixl32::Register ref_reg = RegisterFrom(ref);
vixl32::Register temp_reg = RegisterFrom(temp);
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp3`.
+ Location temp3 = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path;
+ if (always_update_field) {
+ DCHECK(temp2 != nullptr);
+ // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+ // only supports address of the form `obj + field_offset`, where
+ // `obj` is a register and `field_offset` is a register pair (of
+ // which only the lower half is used). Thus `offset` and
+ // `scale_factor` above are expected to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ Location field_offset = index;
+ slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ offset,
+ /* index */ field_offset,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ *temp2,
+ /* entrypoint */ temp3);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp3);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
+ AddSlowPath(slow_path);
+
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
+ // Fast path: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ __ Bind(slow_path->GetExitLabel());
+}
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp_reg`.
- __ Add(obj, obj, Operand(temp_reg, ShiftType::LSR, 32));
+void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check) {
+ Primitive::Type type = Primitive::kPrimNot;
+ vixl32::Register ref_reg = RegisterFrom(ref, type);
+
+ // If needed, vixl::EmissionCheckScope guards are used to ensure
+ // that no pools are emitted between the load (macro) instruction
+ // and MaybeRecordImplicitNullCheck.
- // The actual reference load.
if (index.IsValid()) {
// Load types involving an "index": ArrayGet,
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
// intrinsics.
- // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
size_t computed_offset =
(Int32ConstantFrom(index) << scale_factor) + offset;
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
} else {
// Handle the special case of the
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
@@ -7422,46 +7689,27 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
? LowRegisterFrom(index)
: RegisterFrom(index);
UseScratchRegisterScope temps(GetVIXLAssembler());
- const vixl32::Register temp3 = temps.Acquire();
- __ Add(temp3, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp3, offset);
+ vixl32::Register temp = temps.Acquire();
+ __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
+ {
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
}
} else {
- // /* HeapReference<Object> */ ref = *(obj + offset)
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
}
// Object* ref = ref_addr->AsMirrorPtr()
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-
- // Slow path marking the object `ref` when it is gray.
- SlowPathCodeARMVIXL* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address
- // of the form `obj + field_offset`, where `obj` is a register and
- // `field_offset` is a register pair (of which only the lower half
- // is used). Thus `offset` and `scale_factor` above are expected
- // to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
- } else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, ref);
- }
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
- __ B(cs, slow_path->GetEntryLabel()); // Carry flag is the last bit shifted out by LSRS.
- __ Bind(slow_path->GetExitLabel());
}
void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 92e922d8f9..5ff7dd69e7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -45,6 +45,11 @@ static constexpr bool kArmUseVIXL32 = true;
namespace art {
namespace arm {
+// This constant is used as an approximate margin when emission of veneer and literal pools
+// must be blocked.
+static constexpr int kMaxMacroInstructionSizeInBytes =
+ 15 * vixl::aarch32::kMaxInstructionSizeInBytes;
+
static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = {
vixl::aarch32::r1,
vixl::aarch32::r2,
@@ -625,6 +630,15 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
bool always_update_field = false,
vixl::aarch32::Register* temp2 = nullptr);
+ // Generate a heap reference load (with no read barrier).
+ void GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check);
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 86000e9356..28095c4d3f 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1947,6 +1947,8 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
}
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
// The base destination address is computed later, as `temp2` is
// used for intermediate computations.
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 6c3938c1a9..934ba1b9fb 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -853,7 +853,6 @@ static void GenUnsafeGet(HInvoke* invoke,
DCHECK((type == Primitive::kPrimInt) ||
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot));
- MacroAssembler* masm = codegen->GetVIXLAssembler();
Location base_loc = locations->InAt(1);
Register base = WRegisterFrom(base_loc); // Object pointer.
Location offset_loc = locations->InAt(2);
@@ -863,8 +862,7 @@ static void GenUnsafeGet(HInvoke* invoke,
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
- UseScratchRegisterScope temps(masm);
- Register temp = temps.AcquireW();
+ Register temp = WRegisterFrom(locations->GetTemp(0));
codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
trg_loc,
base,
@@ -901,6 +899,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -2381,9 +2382,14 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// Temporary register IP0, obtained from the VIXL scratch register
// pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
// (because that register is clobbered by ReadBarrierMarkRegX
- // entry points). Get an extra temporary register from the
- // register allocator.
+ // entry points). It cannot be used in calls to
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+ // either. For these reasons, get a third extra temporary register
+ // from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ } else {
+ // Cases other than Baker read barriers: the third temporary will
+ // be acquired from the VIXL scratch register pool.
}
}
@@ -2494,11 +2500,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
- // Note: Because it is acquired from VIXL's scratch register pool,
- // `temp3` might be IP0, and thus cannot be used as `ref` argument
- // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
- // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
- Register temp3 = temps.AcquireW();
+ Register temp3;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ temp3 = WRegisterFrom(locations->GetTemp(2));
+ } else {
+ temp3 = temps.AcquireW();
+ }
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
@@ -2704,19 +2711,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Register src_curr_addr = temp1.X();
Register dst_curr_addr = temp2.X();
- Register src_stop_addr;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // Temporary register IP0, obtained from the VIXL scratch
- // register pool as `temp3`, cannot be used in
- // ReadBarrierSystemArrayCopySlowPathARM64 (because that
- // register is clobbered by ReadBarrierMarkRegX entry points).
- // So another temporary register allocated by the register
- // allocator instead.
- DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
- src_stop_addr = XRegisterFrom(locations->GetTemp(2));
- } else {
- src_stop_addr = temp3.X();
- }
+ Register src_stop_addr = temp3.X();
GenSystemArrayCopyAddresses(masm,
Primitive::kPrimNot,
@@ -2732,6 +2727,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
// SystemArrayCopy implementation for Baker read barriers (see
// also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
//
@@ -2758,10 +2755,11 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ Cmp(src_curr_addr, src_stop_addr);
__ B(&done, eq);
- Register tmp = temps.AcquireW();
// Make sure `tmp` is not IP0, as it is clobbered by
// ReadBarrierMarkRegX entry points in
// ReadBarrierSystemArrayCopySlowPathARM64.
+ temps.Exclude(ip0);
+ Register tmp = temps.AcquireW();
DCHECK_NE(LocationFrom(tmp).reg(), IP0);
// /* int32_t */ monitor = src->monitor_
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index aa89deae34..60bcf2cfd5 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2265,6 +2265,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
}
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
// The base destination address is computed later, as `temp2` is
// used for intermediate computations.
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 2f2565b9d0..edc64f35a1 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -57,7 +57,8 @@ class Monitor;
* |10|9|87654321098765432109876543210|
* |11|0| ForwardingAddress |
*
- * The rb bits store the read barrier state.
+ * The `r` bit stores the read barrier state.
+ * The `m` bit stores the mark state.
*/
class LockWord {
public:
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
index 3f6f76f510..fd68deb71c 100644
--- a/runtime/type_lookup_table.h
+++ b/runtime/type_lookup_table.h
@@ -148,7 +148,7 @@ class TypeLookupTable {
return mask_;
}
- // Attempt to set an entry on it's hash' slot. If there is alrady something there, return false.
+ // Attempt to set an entry on its hash's slot. If there is already something there, return false.
// Otherwise return true.
bool SetOnInitialPos(const Entry& entry, uint32_t hash);