Revert "Use the "GC is marking" information in compiler read barriers (ARM, ARM64)."
This reverts commit 1372c9f40df1e47bf775f1466bbb96f472b6b9ed.
This change (along with https://android-review.googlesource.com/#/c/342429/)
creates null pointer dereferences.
Bug: 35780827
Bug: 29516974
Change-Id: I2a9c4d0ad8d2ab870c2e0ddbff32152933c77abe
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index c92a056..6bfbe4a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -668,10 +668,6 @@
// probably still be a from-space reference (unless it gets updated by
// another thread, or if another thread installed another object
// reference (different from `ref`) in `obj.field`).
-//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
@@ -736,7 +732,6 @@
arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
__ Blx(RegisterFrom(entrypoint_));
} else {
- // Entrypoint is not already loaded, load from the thread.
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
// This runtime call does not require a stack map.
@@ -765,10 +760,6 @@
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
-//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
@@ -776,15 +767,13 @@
vixl32::Register obj,
Location field_offset,
vixl32::Register temp1,
- vixl32::Register temp2,
- Location entrypoint = Location::NoLocation())
+ vixl32::Register temp2)
: SlowPathCodeARMVIXL(instruction),
ref_(ref),
obj_(obj),
field_offset_(field_offset),
temp1_(temp1),
- temp2_(temp2),
- entrypoint_(entrypoint) {
+ temp2_(temp2) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -839,16 +828,10 @@
//
// rX <- ReadBarrierMarkRegX(rX)
//
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blx(RegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
// If the new reference is different from the old reference,
// update the field in the holder (`*(obj_ + field_offset_)`).
@@ -945,9 +928,6 @@
const vixl32::Register temp1_;
const vixl32::Register temp2_;
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL);
};
@@ -7283,35 +7263,14 @@
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
- // Baker's read barrier are used.
+ // Baker's read barrier are used:
//
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
+ // root = obj.field;
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // if (temp != null) {
+ // root = temp(root)
// }
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction, root, /* entrypoint */ temp);
- codegen_->AddSlowPath(slow_path);
-
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
-
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
static_assert(
@@ -7322,6 +7281,21 @@
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
+ // Slow path marking the GC root `root`.
+ Location temp = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction,
+ root,
+ /*entrypoint*/ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
@@ -7392,33 +7366,77 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // After loading the reference from `obj.field` into `ref`, query
- // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
- // need to enter the slow path to mark the reference. This
- // optimistic strategy (we expect the GC to not be marking most of
- // the time) does not check `obj`'s lock word (to see if it is a
- // gray object or not), so may sometimes mark an already marked
- // object.
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
//
- // Note that we do not actually check the value of `GetIsGcMarking()`;
- // instead, we load into `temp3` the read barrier mark entry point
- // corresponding to register `ref`. If `temp3` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
- //
- // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
// }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
- // TODO: This temp register is only necessary when
- // `always_update_field` is true; make it optional (like `temp2`).
+ vixl32::Register ref_reg = RegisterFrom(ref);
vixl32::Register temp_reg = RegisterFrom(temp);
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will already be loaded in `temp3`.
- Location temp3 = LocationFrom(lr);
+ // /* int32_t */ monitor = obj->monitor_
+ GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp_reg`.
+ __ Add(obj, obj, Operand(temp_reg, ShiftType::LSR, 32));
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
+ // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ if (index.IsConstant()) {
+ size_t computed_offset =
+ (Int32ConstantFrom(index) << scale_factor) + offset;
+ GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+ } else {
+ // Handle the special case of the
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics, which use a register pair as index ("long
+ // offset"), of which only the low part contains data.
+ vixl32::Register index_reg = index.IsRegisterPair()
+ ? LowRegisterFrom(index)
+ : RegisterFrom(index);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp3 = temps.Acquire();
+ __ Add(temp3, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
+ GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp3, offset);
+ }
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path marking the object `ref` when it is gray.
SlowPathCodeARMVIXL* slow_path;
if (always_update_field) {
DCHECK(temp2 != nullptr);
@@ -7430,86 +7448,24 @@
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
+ instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
} else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction, ref, /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, ref);
}
AddSlowPath(slow_path);
- // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
- // The reference load.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
+ __ B(cs, slow_path->GetEntryLabel()); // Carry flag is the last bit shifted out by LSRS.
__ Bind(slow_path->GetExitLabel());
}
-void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check) {
- Primitive::Type type = Primitive::kPrimNot;
- vixl32::Register ref_reg = RegisterFrom(ref, type);
-
- // If needed, vixl::EmissionCheckScope guards are used to ensure
- // that no pools are emitted between the load (macro) instruction
- // and MaybeRecordImplicitNullCheck.
-
- if (index.IsValid()) {
- // Load types involving an "index": ArrayGet,
- // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
- // intrinsics.
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
- if (index.IsConstant()) {
- size_t computed_offset =
- (Int32ConstantFrom(index) << scale_factor) + offset;
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- } else {
- // Handle the special case of the
- // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
- // intrinsics, which use a register pair as index ("long
- // offset"), of which only the low part contains data.
- vixl32::Register index_reg = index.IsRegisterPair()
- ? LowRegisterFrom(index)
- : RegisterFrom(index);
- UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
- __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
- {
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
- }
- } else {
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
-
- // Object* ref = ref_addr->AsMirrorPtr()
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-}
-
void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
Location out,
Location ref,