Merge "Revert^2 "ARM: Reimplement the UnsafeCASObject intrinsic.""
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d811e07..8c5eafd 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -111,7 +111,7 @@
public:
EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
: assembler_(assembler), rd_(rd), label_(label) {
- ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+ DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope.
adr_location_ = assembler->GetCursorOffset();
assembler->adr(EncodingSize(Wide), rd, label);
}
@@ -715,294 +715,6 @@
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
};
-// Abstract base class for read barrier slow paths marking a reference
-// `ref`.
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
- protected:
- ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
- : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; }
-
- // Generate assembly code calling the read barrier marking runtime
- // entry point (ReadBarrierMarkRegX).
- void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
- vixl32::Register ref_reg = RegisterFrom(ref_);
-
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
- DCHECK(!ref_reg.Is(sp));
- DCHECK(!ref_reg.Is(lr));
- DCHECK(!ref_reg.Is(pc));
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK(!ref_reg.Is(ip));
- DCHECK(ref_reg.IsRegister()) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blx(RegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
- }
-
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). If needed, this slow path also atomically updates
-// the field `obj.field` in the object `obj` holding this reference
-// after marking.
-//
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
- : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
- HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- vixl32::Register temp1,
- vixl32::Register temp2,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp1_(temp1),
- temp2_(temp2) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- vixl32::Register ref_reg = RegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg());
-
- // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
- DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking and field updating slow path: "
- << instruction_->DebugName();
- DCHECK(instruction_->GetLocations()->Intrinsified());
- DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK_EQ(offset_, 0u);
- DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
- Location field_offset = index_;
- DCHECK(field_offset.IsRegisterPair()) << field_offset;
-
- // Temporary register `temp1_`, used to store the lock word, must
- // not be IP, as we may use it to emit the reference load (in the
- // call to GenerateRawReferenceLoad below), and we need the lock
- // word to still be in `temp1_` after the reference load.
- DCHECK(!temp1_.Is(ip));
-
- __ Bind(GetEntryLabel());
-
- // The implementation is:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // old_ref = ref;
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // compareAndSwapObject(obj, field_offset, old_ref, ref);
- // }
-
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
- __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
-
- // Save the old value of the reference before marking it.
- // Note that we cannot use IP to save the old reference, as IP is
- // used internally by the ReadBarrierMarkRegX entry point, and we
- // need the old reference after the call to that entry point.
- DCHECK(!temp1_.Is(ip));
- __ Mov(temp1_, ref_reg);
-
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- // If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset)`).
- //
- // Note that this field could also hold a different object, if
- // another thread had concurrently changed it. In that case, the
- // LDREX/CMP/BNE sequence of instructions in the compare-and-set
- // (CAS) operation below would abort the CAS, leaving the field
- // as-is.
- __ Cmp(temp1_, ref_reg);
- __ B(eq, GetExitLabel());
-
- // Update the the holder's field atomically. This may fail if
- // mutator updates before us, but it's OK. This is achieved
- // using a strong compare-and-set (CAS) operation with relaxed
- // memory synchronization ordering, where the expected value is
- // the old reference and the desired value is the new reference.
-
- UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
- // Convenience aliases.
- vixl32::Register base = obj_;
- // The UnsafeCASObject intrinsic uses a register pair as field
- // offset ("long offset"), of which only the low part contains
- // data.
- vixl32::Register offset = LowRegisterFrom(field_offset);
- vixl32::Register expected = temp1_;
- vixl32::Register value = ref_reg;
- vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory.
- vixl32::Register tmp = temp2_; // Value in memory.
-
- __ Add(tmp_ptr, base, offset);
-
- if (kPoisonHeapReferences) {
- arm_codegen->GetAssembler()->PoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not poison `value`, as it is the same register as
- // `expected`, which has just been poisoned.
- } else {
- arm_codegen->GetAssembler()->PoisonHeapReference(value);
- }
- }
-
- // do {
- // tmp = [r_ptr] - expected;
- // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
-
- vixl32::Label loop_head, comparison_failed, exit_loop;
- __ Bind(&loop_head);
- __ Ldrex(tmp, MemOperand(tmp_ptr));
- __ Cmp(tmp, expected);
- __ B(ne, &comparison_failed, /* far_target */ false);
- __ Strex(tmp, value, MemOperand(tmp_ptr));
- __ CompareAndBranchIfZero(tmp, &exit_loop, /* far_target */ false);
- __ B(&loop_head);
- __ Bind(&comparison_failed);
- __ Clrex();
- __ Bind(&exit_loop);
-
- if (kPoisonHeapReferences) {
- arm_codegen->GetAssembler()->UnpoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not unpoison `value`, as it is the same register as
- // `expected`, which has just been unpoisoned.
- } else {
- arm_codegen->GetAssembler()->UnpoisonHeapReference(value);
- }
- }
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- const vixl32::Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`; and
- // also to hold the original reference value, when the reference is
- // marked.
- const vixl32::Register temp1_;
- // A temporary register used in the implementation of the CAS, to
- // update the object's reference field.
- const vixl32::Register temp2_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL);
-};
-
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
@@ -2295,6 +2007,14 @@
}
break;
}
+ case BakerReadBarrierKind::kUnsafeCas: {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(literal_offset - 4u);
+ // ADD (register), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0fff0u, 0xeb000000u | (root_reg << 8));
+ break;
+ }
default:
LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
UNREACHABLE();
@@ -8626,7 +8346,11 @@
bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
+ size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
+ size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(mr, Operand(0));
@@ -8636,7 +8360,7 @@
ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
EmitBakerReadBarrierBne(custom_data);
- __ Bind(&return_address);
+ __ bind(&return_address);
DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
: BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
@@ -8658,6 +8382,32 @@
MaybeGenerateMarkingRegisterCheck(/* code */ 19);
}
+void CodeGeneratorARMVIXL::GenerateUnsafeCasOldValueAddWithBakerReadBarrier(
+ vixl::aarch32::Register old_value,
+ vixl::aarch32::Register adjusted_old_value,
+ vixl::aarch32::Register expected) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with an ADD instead of LDR.
+ uint32_t custom_data = EncodeBakerReadBarrierUnsafeCasData(old_value.GetCode());
+
+ size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
+ size_t wide_instructions = /* ADR+CMP+ADD+BNE */ 4u - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ add(EncodingSize(Wide), old_value, adjusted_old_value, Operand(expected)); // Preserves flags.
+ EmitBakerReadBarrierBne(custom_data);
+ __ bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET);
+}
+
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
@@ -8698,9 +8448,14 @@
EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
{
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ size_t narrow_instructions =
+ /* CMP */ (mr.IsLow() ? 1u : 0u) +
+ /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
+ size_t wide_instructions =
+ /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(mr, Operand(0));
@@ -8719,7 +8474,7 @@
__ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
}
- __ Bind(&return_address);
+ __ bind(&return_address);
DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
: BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
@@ -8791,9 +8546,12 @@
__ Add(data_reg, obj, Operand(data_offset));
{
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
+ size_t wide_instructions =
+ /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(mr, Operand(0));
@@ -8805,127 +8563,13 @@
if (kPoisonHeapReferences) {
__ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
- __ Bind(&return_address);
+ __ bind(&return_address);
DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
}
MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip));
}
-void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- Location field_offset,
- Location temp,
- bool needs_null_check,
- vixl32::Register temp2) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
-
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to update the reference field within `obj`. Then, in the
- // slow path, check the gray bit in the lock word of the reference's
- // holder (`obj`) to decide whether to mark `ref` and update the
- // field or not.
- //
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // old_ref = ref;
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // compareAndSwapObject(obj, field_offset, old_ref, ref);
- // }
- // }
-
- vixl32::Register temp_reg = RegisterFrom(temp);
-
- // Slow path updating the object reference at address `obj + field_offset`
- // when the GC is marking. The entrypoint will be loaded by the slow path code.
- SlowPathCodeARMVIXL* slow_path =
- new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- /* offset */ 0u,
- /* index */ field_offset,
- /* scale_factor */ ScaleFactor::TIMES_1,
- needs_null_check,
- temp_reg,
- temp2);
- AddSlowPath(slow_path);
-
- __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
- // Fast path: the GC is not marking: nothing to do (the field is
- // up-to-date, and we don't need to load the reference).
- __ Bind(slow_path->GetExitLabel());
- MaybeGenerateMarkingRegisterCheck(/* code */ 23);
-}
-
-void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check) {
- DataType::Type type = DataType::Type::kReference;
- vixl32::Register ref_reg = RegisterFrom(ref, type);
-
- // If needed, vixl::EmissionCheckScope guards are used to ensure
- // that no pools are emitted between the load (macro) instruction
- // and MaybeRecordImplicitNullCheck.
-
- if (index.IsValid()) {
- // Load types involving an "index": ArrayGet,
- // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
- // intrinsics.
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
- if (index.IsConstant()) {
- size_t computed_offset =
- (Int32ConstantFrom(index) << scale_factor) + offset;
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- } else {
- // Handle the special case of the
- // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
- // intrinsics, which use a register pair as index ("long
- // offset"), of which only the low part contains data.
- vixl32::Register index_reg = index.IsRegisterPair()
- ? LowRegisterFrom(index)
- : RegisterFrom(index);
- UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
- __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
- {
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
- }
- } else {
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
-
- // Object* ref = ref_addr->AsMirrorPtr()
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-}
-
void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
// The following condition is a compile-time one, so it does not have a run-time cost.
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
@@ -9215,7 +8859,7 @@
}
void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
- ExactAssemblyScope eas(GetVIXLAssembler(), 1 * k32BitT32InstructionSizeInBytes);
+ DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
if (Runtime::Current()->UseJitCompilation()) {
auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
vixl::aarch32::Label* slow_path_entry = &it->second.label;
@@ -9774,7 +9418,8 @@
__ Bx(ep_reg); // Jump to the entrypoint's array switch case.
break;
}
- case BakerReadBarrierKind::kGcRoot: {
+ case BakerReadBarrierKind::kGcRoot:
+ case BakerReadBarrierKind::kUnsafeCas: {
// Check if the reference needs to be marked and if so (i.e. not null, not marked yet
// and it does not have a forwarding address), call the correct introspection entrypoint;
// otherwise return the reference (or the extracted forwarding address).
@@ -9802,10 +9447,14 @@
__ B(hs, &forwarding_address);
vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
// Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
- // to art_quick_read_barrier_mark_introspection_gc_roots.
- int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
- ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
- : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+ // to one of art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},unsafe_cas}.
+ DCHECK(kind != BakerReadBarrierKind::kUnsafeCas || width == BakerReadBarrierWidth::kWide);
+ int32_t entrypoint_offset =
+ (kind == BakerReadBarrierKind::kGcRoot)
+ ? (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET;
__ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
__ Mov(ip, root_reg);
__ Bx(ep_reg);
@@ -9851,6 +9500,12 @@
DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
BakerReadBarrierSecondRegField::Decode(encoded_data));
break;
+ case BakerReadBarrierKind::kUnsafeCas:
+ oss << "UnsafeCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+ break;
}
*debug_name = oss.str();
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 2fd18ca..cb131a7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -622,6 +622,11 @@
vixl::aarch32::Register obj,
uint32_t offset,
ReadBarrierOption read_barrier_option);
+ // Generate ADD for UnsafeCASObject to reconstruct the old value from
+ // `old_value - expected` and mark it with Baker read barrier.
+ void GenerateUnsafeCasOldValueAddWithBakerReadBarrier(vixl::aarch32::Register old_value,
+ vixl::aarch32::Register adjusted_old_value,
+ vixl::aarch32::Register expected);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
// Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
@@ -647,35 +652,6 @@
Location temp,
bool needs_null_check);
- // Generate code checking whether the the reference field at the
- // address `obj + field_offset`, held by object `obj`, needs to be
- // marked, and if so, marking it and updating the field within `obj`
- // with the marked value.
- //
- // This routine is used for the implementation of the
- // UnsafeCASObject intrinsic with Baker read barriers.
- //
- // This method has a structure similar to
- // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
- // `ref` is only as a temporary here, and thus its value should not
- // be used afterwards.
- void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- Location field_offset,
- Location temp,
- bool needs_null_check,
- vixl::aarch32::Register temp2);
-
- // Generate a heap reference load (with no read barrier).
- void GenerateRawReferenceLoad(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check);
-
// Emit code checking the status of the Marking Register, and
// aborting the program if MR does not match the value stored in the
// art::Thread object. Code is only emitted in debug mode and if
@@ -772,10 +748,11 @@
// Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
enum class BakerReadBarrierKind : uint8_t {
- kField, // Field get or array get with constant offset (i.e. constant index).
- kArray, // Array get with index in register.
- kGcRoot, // GC root load.
- kLast = kGcRoot
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kArray, // Array get with index in register.
+ kGcRoot, // GC root load.
+ kUnsafeCas, // UnsafeCASObject intrinsic.
+ kLast = kUnsafeCas
};
enum class BakerReadBarrierWidth : uint8_t {
@@ -842,6 +819,14 @@
BakerReadBarrierWidthField::Encode(width);
}
+ static uint32_t EncodeBakerReadBarrierUnsafeCasData(uint32_t root_reg) {
+ CheckValidReg(root_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kUnsafeCas) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+ }
+
void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
uint32_t encoded_data,
/*out*/ std::string* debug_name);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 53b0aa2..74a779d 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -936,9 +936,7 @@
codegen_);
}
-static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator,
- HInvoke* invoke,
- DataType::Type type) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
bool can_call = kEmitCompilerReadBarrier &&
kUseBakerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
@@ -948,20 +946,16 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall,
kIntrinsified);
+ if (can_call) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // If heap poisoning is enabled, we don't want the unpoisoning
- // operations to potentially clobber the output. Likewise when
- // emitting a (Baker) read barrier, which may call.
- Location::OutputOverlap overlaps =
- ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
- ? Location::kOutputOverlap
- : Location::kNoOutputOverlap;
- locations->SetOut(Location::RequiresRegister(), overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
// Temporary registers used in CAS. In the object case
// (UnsafeCASObject intrinsic), these are also used for
@@ -970,24 +964,92 @@
locations->AddTemp(Location::RequiresRegister()); // Temp 1.
}
+class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke)
+ : SlowPathCodeARMVIXL(invoke) {}
+
+ const char* GetDescription() const OVERRIDE { return "BakerReadBarrierCasSlowPathARMVIXL"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
+ __ Bind(GetEntryLabel());
+
+ LocationSummary* locations = instruction_->GetLocations();
+ vixl32::Register base = InputRegisterAt(instruction_, 1); // Object pointer.
+ vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
+ vixl32::Register expected = InputRegisterAt(instruction_, 3); // Expected.
+ vixl32::Register value = InputRegisterAt(instruction_, 4); // Value.
+
+ vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
+ vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary.
+
+ // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct
+ // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the
+ // process due to lack of other temps suitable for the read barrier.
+ arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected);
+ __ Cmp(tmp_ptr, expected);
+ __ B(ne, GetExitLabel());
+
+ // The old value we have read did not match `expected` (which is always a to-space reference)
+ // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked
+ // to-space value matched, so the old value must be a from-space reference to the same object.
+ // Do the same CAS loop as the main path but check for both `expected` and the unmarked
+ // old value representing the to-space and from-space references for the same object.
+
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ vixl32::Register adjusted_old_value = temps.Acquire(); // For saved `tmp` from main path.
+
+ // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP.
+ __ Add(tmp_ptr, base, offset);
+ __ Mov(adjusted_old_value, tmp);
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value));
+ // result = (tmp == 0 || tmp == adjusted_old_value);
+
+ vixl32::Label loop_head;
+ __ Bind(&loop_head);
+ __ Ldrex(tmp, MemOperand(tmp_ptr)); // This can now load null stored by another thread.
+ assembler->MaybeUnpoisonHeapReference(tmp);
+ __ Subs(tmp, tmp, expected); // Use SUBS to get non-zero value if both compares fail.
+ {
+ // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`.
+ ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
+ __ it(ne);
+ __ cmp(ne, tmp, adjusted_old_value);
+ }
+ __ B(ne, GetExitLabel());
+ assembler->MaybePoisonHeapReference(value);
+ __ Strex(tmp, value, MemOperand(tmp_ptr));
+ assembler->MaybeUnpoisonHeapReference(value);
+ __ Cmp(tmp, 0);
+ __ B(ne, &loop_head, /* far_target */ false);
+ __ B(GetExitLabel());
+ }
+};
+
static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
DCHECK_NE(type, DataType::Type::kInt64);
ArmVIXLAssembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
- Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(invoke); // Boolean result.
vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
- Location offset_loc = locations->InAt(2);
- vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
+ vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
- Location tmp_ptr_loc = locations->GetTemp(0);
- vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
- vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
+ vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
+ vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary.
+
+ vixl32::Label loop_exit_label;
+ vixl32::Label* loop_exit = &loop_exit_label;
+ vixl32::Label* failure = &loop_exit_label;
if (type == DataType::Type::kReference) {
// The only read barrier implementation supporting the
@@ -1000,87 +1062,62 @@
codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // Need to make sure the reference stored in the field is a to-space
- // one before attempting the CAS or the CAS could fail incorrectly.
- codegen->UpdateReferenceFieldWithBakerReadBarrier(
- invoke,
- out_loc, // Unused, used only as a "temporary" within the read barrier.
- base,
- /* field_offset */ offset_loc,
- tmp_ptr_loc,
- /* needs_null_check */ false,
- tmp);
+ // If marking, check if the stored reference is a from-space reference to the same
+ // object as the to-space reference `expected`. If so, perform a custom CAS loop.
+ BakerReadBarrierCasSlowPathARMVIXL* slow_path =
+ new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke);
+ codegen->AddSlowPath(slow_path);
+ failure = slow_path->GetEntryLabel();
+ loop_exit = slow_path->GetExitLabel();
}
}
// Prevent reordering with prior memory operations.
// Emit a DMB ISH instruction instead of an DMB ISHST one, as the
- // latter allows a preceding load to be delayed past the STXR
+ // latter allows a preceding load to be delayed past the STREX
// instruction below.
__ Dmb(vixl32::ISH);
__ Add(tmp_ptr, base, offset);
- if (kPoisonHeapReferences && type == DataType::Type::kReference) {
- codegen->GetAssembler()->PoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not poison `value`, as it is the same register as
- // `expected`, which has just been poisoned.
- } else {
- codegen->GetAssembler()->PoisonHeapReference(value);
- }
- }
-
// do {
// tmp = [r_ptr] - expected;
// } while (tmp == 0 && failure([r_ptr] <- r_new_value));
- // result = tmp != 0;
+ // result = tmp == 0;
vixl32::Label loop_head;
__ Bind(&loop_head);
-
__ Ldrex(tmp, MemOperand(tmp_ptr));
-
- __ Subs(tmp, tmp, expected);
-
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ itt(eq);
- __ strex(eq, tmp, value, MemOperand(tmp_ptr));
- __ cmp(eq, tmp, 1);
+ if (type == DataType::Type::kReference) {
+ assembler->MaybeUnpoisonHeapReference(tmp);
}
+ __ Subs(tmp, tmp, expected);
+ __ B(ne, failure, (failure == loop_exit) ? kNear : kBranchWithoutHint);
+ if (type == DataType::Type::kReference) {
+ assembler->MaybePoisonHeapReference(value);
+ }
+ __ Strex(tmp, value, MemOperand(tmp_ptr));
+ if (type == DataType::Type::kReference) {
+ assembler->MaybeUnpoisonHeapReference(value);
+ }
+ __ Cmp(tmp, 0);
+ __ B(ne, &loop_head, /* far_target */ false);
- __ B(eq, &loop_head, /* far_target */ false);
+ __ Bind(loop_exit);
__ Dmb(vixl32::ISH);
- __ Rsbs(out, tmp, 1);
+ // out = tmp == 0.
+ __ Clz(out, tmp);
+ __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ it(cc);
- __ mov(cc, out, 0);
- }
-
- if (kPoisonHeapReferences && type == DataType::Type::kReference) {
- codegen->GetAssembler()->UnpoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not unpoison `value`, as it is the same register as
- // `expected`, which has just been unpoisoned.
- } else {
- codegen->GetAssembler()->UnpoisonHeapReference(value);
- }
+ if (type == DataType::Type::kReference) {
+ codegen->MaybeGenerateMarkingRegisterCheck(/* code */ 128);
}
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
- CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32);
+ CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
// The only read barrier implementation supporting the
@@ -1089,7 +1126,7 @@
return;
}
- CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference);
+ CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
GenCas(invoke, DataType::Type::kInt32, codegen_);
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 7123ae7..9a01656 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -37,6 +37,9 @@
// The offset from art_quick_read_barrier_mark_introspection to the array switch cases,
// i.e. art_quick_read_barrier_mark_introspection_arrays.
#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100
+// The offset from art_quick_read_barrier_mark_introspection to the entrypoint for the
+// UnsafeCASObject intrinsic, i.e. art_quick_read_barrier_mark_introspection_unsafe_cas.
+#define BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET 0x180
// The offset of the reference load LDR from the return address in LR for field loads.
#ifdef USE_HEAP_POISONING
@@ -55,5 +58,7 @@
// The offset of the reference load LDR from the return address in LR for GC root loads.
#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET -8
#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET -6
+// The offset of the ADD from the return address in LR for UnsafeCASObject intrinsic.
+#define BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET -8
#endif // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index b4e9036..2c5465e 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -59,6 +59,7 @@
extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_wide(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_narrow(
mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_unsafe_cas(mirror::Object*);
// Used by soft float.
// Single-precision FP arithmetics.
@@ -113,6 +114,10 @@
reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_narrow) -
reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET, gc_roots_narrow_diff);
+ intptr_t unsafe_cas_diff =
+ reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_unsafe_cas) -
+ reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+ DCHECK_EQ(BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET, unsafe_cas_diff);
// The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12.
// We're using the entry to hold a pointer to the introspection entrypoint instead.
qpoints->pReadBarrierMarkReg12 =
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index c86baa1..8f56430 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2575,6 +2575,12 @@
* The thunk also performs all the fast-path checks, so we need just the
* slow path.
*
+ * The UnsafeCASObject intrinsic is similar to the GC roots wide approach
+ * but using ADD (register, T3) instead of the LDR (immediate, T3), so the
+ * destination register is in bits 8-11 rather than 12-15. Therefore it has
+ * its own entrypoint, art_quick_read_barrier_mark_introspection_unsafe_cas
+ * at the offset BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET.
+ *
* The code structure is
* art_quick_read_barrier_mark_introspection: // @0x00
* Up to 32 bytes code for main entrypoint fast-path code for fields
@@ -2610,6 +2616,9 @@
* return switch.
* art_quick_read_barrier_mark_introspection_arrays: // @0x100
* Exactly 128 bytes for array load switch cases (16x2 instructions).
+ * art_quick_read_barrier_mark_introspection_unsafe_cas: // @0x180
+ * UnsafeCASObject intrinsic entrypoint for ADD (register) encoding T3
+ * (6 bytes). Loads the return register and jumps to the runtime call.
*/
#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
.balign 512
@@ -2669,7 +2678,6 @@
BRBMI_RUNTIME_CALL
b .Lmark_introspection_return_switch
-
.balign 256
.thumb_func
.type art_quick_read_barrier_mark_introspection_arrays, #function
@@ -2677,6 +2685,19 @@
.global art_quick_read_barrier_mark_introspection_arrays
art_quick_read_barrier_mark_introspection_arrays:
BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
+
+ .balign 8
+ .thumb_func
+ .type art_quick_read_barrier_mark_introspection_unsafe_cas, #function
+ .hidden art_quick_read_barrier_mark_introspection_unsafe_cas
+ .global art_quick_read_barrier_mark_introspection_unsafe_cas
+art_quick_read_barrier_mark_introspection_unsafe_cas:
+ // Load the byte of the ADD instruction that contains Rd. Adjust for the thumb state in LR.
+ // The ADD (register, T3) is |11101011000|S|Rn|(0)imm3|Rd|imm2|type|Rm| and we're using
+ // no shift (type=0, imm2=0, imm3=0), so the byte we read here, i.e. |(0)imm3|Rd|,
+ // contains only the register number, the top 4 bits are 0.
+ ldrb rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET + 3)]
+ b .Lmark_introspection_runtime_call
END art_quick_read_barrier_mark_introspection
#else // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
ENTRY art_quick_read_barrier_mark_introspection