summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/code_generator_arm.cc203
-rw-r--r--compiler/optimizing/code_generator_arm.h12
-rw-r--r--compiler/optimizing/code_generator_arm64.cc236
-rw-r--r--compiler/optimizing/code_generator_arm64.h14
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc226
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h14
-rw-r--r--compiler/optimizing/intrinsics_arm.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc2
9 files changed, 426 insertions, 285 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 7b84ef83cd..710ca7ad45 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -650,7 +650,7 @@ class ArraySetSlowPathARM : public SlowPathCodeARM {
//
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
-// is for the GcRoot read barrier.
+// is when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
public:
ReadBarrierMarkSlowPathARM(HInstruction* instruction,
@@ -715,6 +715,7 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
__ blx(entrypoint_.AsRegister<Register>());
} else {
+ // Entrypoint is not already loaded, load from the thread.
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
// This runtime call does not require a stack map.
@@ -743,6 +744,10 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
public:
ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
@@ -750,13 +755,15 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
Register obj,
Location field_offset,
Register temp1,
- Register temp2)
+ Register temp2,
+ Location entrypoint = Location::NoLocation())
: SlowPathCodeARM(instruction),
ref_(ref),
obj_(obj),
field_offset_(field_offset),
temp1_(temp1),
- temp2_(temp2) {
+ temp2_(temp2),
+ entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -809,10 +816,16 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
//
// rX <- ReadBarrierMarkRegX(rX)
//
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ blx(entrypoint_.AsRegister<Register>());
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
// If the new reference is different from the old reference,
// update the field in the holder (`*(obj_ + field_offset_)`).
@@ -902,6 +915,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
const Register temp1_;
const Register temp2_;
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
};
@@ -7185,14 +7201,35 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
- // Baker's read barrier are used:
+ // Baker's read barrier are used.
+ //
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
//
- // root = obj.field;
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
// }
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ LoadFromOffset(kLoadWord, root_reg, obj, offset);
static_assert(
@@ -7203,21 +7240,6 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
- Location temp = Location::RegisterLocation(LR);
- SlowPathCodeARM* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
- instruction,
- root,
- /*entrypoint*/ temp);
- codegen_->AddSlowPath(slow_path);
-
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
@@ -7288,51 +7310,79 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // In slow path based read barriers, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // After loading the reference from `obj.field` into `ref`, query
+ // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
+ // need to enter the slow path to mark the reference. This
+ // optimistic strategy (we expect the GC to not be marking most of
+ // the time) does not check `obj`'s lock word (to see if it is a
+ // gray object or not), so may sometimes mark an already marked
+ // object.
//
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
- // }
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp3` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp3` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
//
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
- Register ref_reg = ref.AsRegister<Register>();
+ // TODO: This temp register is only necessary when
+ // `always_update_field` is true; make it optional (like `temp2`).
Register temp_reg = temp.AsRegister<Register>();
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp3`.
+ Location temp3 = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path;
+ if (always_update_field) {
+ DCHECK(temp2 != nullptr);
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
+ instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+ instruction, ref, /* entrypoint */ temp3);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
+ AddSlowPath(slow_path);
+
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
+ // The reference load.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp_reg`.
- __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
+void CodeGeneratorARM::GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check) {
+ Register ref_reg = ref.AsRegister<Register>();
- // The actual reference load.
if (index.IsValid()) {
// Load types involving an "index": ArrayGet,
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
// intrinsics.
- // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
size_t computed_offset =
(index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
@@ -7349,41 +7399,16 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
__ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
}
} else {
- // /* HeapReference<Object> */ ref = *(obj + offset)
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
__ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
}
- // Object* ref = ref_addr->AsMirrorPtr()
- __ MaybeUnpoisonHeapReference(ref_reg);
-
- // Slow path marking the object `ref` when it is gray.
- SlowPathCodeARM* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
- // of the form `obj + field_offset`, where `obj` is a register and
- // `field_offset` is a register pair (of which only the lower half
- // is used). Thus `offset` and `scale_factor` above are expected
- // to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
- } else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
}
- AddSlowPath(slow_path);
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
- __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS.
- __ Bind(slow_path->GetExitLabel());
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
}
void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index df2dbc74ab..1f68777f88 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -520,9 +520,6 @@ class CodeGeneratorARM : public CodeGenerator {
Location index,
Location temp,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
-
// Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
// GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
//
@@ -545,6 +542,15 @@ class CodeGeneratorARM : public CodeGenerator {
bool always_update_field = false,
Register* temp2 = nullptr);
+ // Generate a heap reference load (with no read barrier).
+ void GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check);
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index edccbd4904..5bdaac2e4a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -647,7 +647,7 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
//
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
-// is for the GcRoot read barrier.
+// is when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
@@ -743,18 +743,24 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
Location ref,
Register obj,
Location field_offset,
- Register temp)
+ Register temp,
+ Location entrypoint = Location::NoLocation())
: SlowPathCodeARM64(instruction),
ref_(ref),
obj_(obj),
field_offset_(field_offset),
- temp_(temp) {
+ temp_(temp),
+ entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -810,10 +816,16 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
//
// rX <- ReadBarrierMarkRegX(rX)
//
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
- // This runtime call does not require a stack map.
- arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ if (entrypoint_.IsValid()) {
+ arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ Blr(XRegisterFrom(entrypoint_));
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
// If the new reference is different from the old reference,
// update the field in the holder (`*(obj_ + field_offset_)`).
@@ -896,6 +908,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
const Register temp_;
+ // The location of the entrypoint if it is already loaded.
+ const Location entrypoint_;
+
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
};
@@ -5614,14 +5629,35 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
- // Baker's read barrier are used:
+ // Baker's read barrier are used.
+ //
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
//
- // root = obj.field;
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
// }
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Register temp = lr;
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
+ instruction, root, /* entrypoint */ LocationFrom(temp));
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp, MemOperand(tr, entry_point_offset));
+
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
@@ -5636,20 +5672,6 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- Register temp = lr;
-
- // Slow path marking the GC root `root`. The entrypoint will alrady be loaded in temp.
- SlowPathCodeARM64* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction,
- root,
- LocationFrom(temp));
- codegen_->AddSlowPath(slow_path);
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ Ldr(temp, MemOperand(tr, entry_point_offset));
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ Cbnz(temp, slow_path->GetEntryLabel());
@@ -5751,54 +5773,77 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// `instruction->IsArrayGet()` => `!use_load_acquire`.
DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
- MacroAssembler* masm = GetVIXLAssembler();
- UseScratchRegisterScope temps(masm);
-
- // In slow path based read barriers, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // After loading the reference from `obj.field` into `ref`, query
+ // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
+ // need to enter the slow path to mark the reference. This
+ // optimistic strategy (we expect the GC to not be marking most of
+ // the time) does not check `obj`'s lock word (to see if it is a
+ // gray object or not), so may sometimes mark an already marked
+ // object.
//
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
- // }
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
//
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp2`.
+ Register temp2 = lr;
+ Location temp2_loc = LocationFrom(temp2);
+ SlowPathCodeARM64* slow_path;
+ if (always_update_field) {
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
+ // address of the form `obj + field_offset`, where `obj` is a
+ // register and `field_offset` is a register. Thus `offset` and
+ // `scale_factor` above are expected to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, 0u); /* "times 1" */
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
+ instruction, ref, obj, /* field_offset */ index, temp, /* entrypoint */ temp2_loc);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
+ instruction, ref, /* entrypoint */ temp2_loc);
+ }
+ AddSlowPath(slow_path);
+
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp2, MemOperand(tr, entry_point_offset));
+ // The reference load.
+ GenerateRawReferenceLoad(
+ instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Cbnz(temp2, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire) {
+ DCHECK(obj.IsW());
Primitive::Type type = Primitive::kPrimNot;
Register ref_reg = RegisterFrom(ref, type);
- DCHECK(obj.IsW());
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
-
- {
- // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- // /* int32_t */ monitor = obj->monitor_
- __ Ldr(temp, HeapOperand(obj, monitor_offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
+ // If needed, vixl::EmissionCheckScope guards are used to ensure
+ // that no pools are emitted between the load (macro) instruction
+ // and MaybeRecordImplicitNullCheck.
- // The actual reference load.
if (index.IsValid()) {
// Load types involving an "index": ArrayGet,
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
@@ -5813,59 +5858,50 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
<< instruction->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, 0u);
- DCHECK_EQ(needs_null_check, 0u);
- // /* HeapReference<Object> */ ref = *(obj + index)
+ DCHECK_EQ(needs_null_check, false);
+ // /* HeapReference<mirror::Object> */ ref = *(obj + index)
MemOperand field = HeapOperand(obj, XRegisterFrom(index));
LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
} else {
- // ArrayGet and UnsafeGetObject intrinsics cases.
- // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
Load(type, ref_reg, HeapOperand(obj, computed_offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
} else {
- Register temp3 = temps.AcquireW();
- __ Add(temp3, obj, offset);
- Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor));
- temps.Release(temp3);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireW();
+ __ Add(temp, obj, offset);
+ {
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
}
}
} else {
- // /* HeapReference<Object> */ ref = *(obj + offset)
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
MemOperand field = HeapOperand(obj, offset);
if (use_load_acquire) {
- LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+ // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
+ LoadAcquire(instruction, ref_reg, field, needs_null_check);
} else {
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
Load(type, ref_reg, field);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
}
}
// Object* ref = ref_addr->AsMirrorPtr()
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-
- // Slow path marking the object `ref` when it is gray.
- SlowPathCodeARM64* slow_path;
- if (always_update_field) {
- // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
- // address of the form `obj + field_offset`, where `obj` is a
- // register and `field_offset` is a register. Thus `offset` and
- // `scale_factor` above are expected to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, 0u); /* "times 1" */
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
- instruction, ref, obj, /* field_offset */ index, temp);
- } else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
- }
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
}
void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 5faf29a90f..231fb057c8 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -616,8 +616,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
Location index,
vixl::aarch64::Register temp,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
//
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
@@ -636,6 +636,16 @@ class CodeGeneratorARM64 : public CodeGenerator {
bool use_load_acquire,
bool always_update_field = false);
+ // Generate a heap reference load (with no read barrier).
+ void GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ vixl::aarch64::Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire);
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 6bfbe4a9c9..c92a056f32 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -668,6 +668,10 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
// probably still be a from-space reference (unless it gets updated by
// another thread, or if another thread installed another object
// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
@@ -732,6 +736,7 @@ class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
__ Blx(RegisterFrom(entrypoint_));
} else {
+ // Entrypoint is not already loaded, load from the thread.
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
// This runtime call does not require a stack map.
@@ -760,6 +765,10 @@ class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// when the decision to mark is based on whether the GC is marking.
class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
@@ -767,13 +776,15 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL
vixl32::Register obj,
Location field_offset,
vixl32::Register temp1,
- vixl32::Register temp2)
+ vixl32::Register temp2,
+ Location entrypoint = Location::NoLocation())
: SlowPathCodeARMVIXL(instruction),
ref_(ref),
obj_(obj),
field_offset_(field_offset),
temp1_(temp1),
- temp2_(temp2) {
+ temp2_(temp2),
+ entrypoint_(entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -828,10 +839,16 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL
//
// rX <- ReadBarrierMarkRegX(rX)
//
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ Blx(RegisterFrom(entrypoint_));
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
// If the new reference is different from the old reference,
// update the field in the holder (`*(obj_ + field_offset_)`).
@@ -928,6 +945,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL
const vixl32::Register temp1_;
const vixl32::Register temp2_;
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL);
};
@@ -7263,14 +7283,35 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
- // Baker's read barrier are used:
+ // Baker's read barrier are used.
+ //
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
//
- // root = obj.field;
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
// }
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
static_assert(
@@ -7281,21 +7322,6 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
- Location temp = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction,
- root,
- /*entrypoint*/ temp);
- codegen_->AddSlowPath(slow_path);
-
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
@@ -7366,55 +7392,92 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // In slow path based read barriers, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // After loading the reference from `obj.field` into `ref`, query
+ // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
+ // need to enter the slow path to mark the reference. This
+ // optimistic strategy (we expect the GC to not be marking most of
+ // the time) does not check `obj`'s lock word (to see if it is a
+ // gray object or not), so may sometimes mark an already marked
+ // object.
//
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
- // }
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp3` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp3` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
//
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
- vixl32::Register ref_reg = RegisterFrom(ref);
+ // TODO: This temp register is only necessary when
+ // `always_update_field` is true; make it optional (like `temp2`).
vixl32::Register temp_reg = RegisterFrom(temp);
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp3`.
+ Location temp3 = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path;
+ if (always_update_field) {
+ DCHECK(temp2 != nullptr);
+ // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(
+ instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction, ref, /* entrypoint */ temp3);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
+ AddSlowPath(slow_path);
+
+ // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
+ // The reference load.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check) {
+ Primitive::Type type = Primitive::kPrimNot;
+ vixl32::Register ref_reg = RegisterFrom(ref, type);
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp_reg`.
- __ Add(obj, obj, Operand(temp_reg, ShiftType::LSR, 32));
+ // If needed, vixl::EmissionCheckScope guards are used to ensure
+ // that no pools are emitted between the load (macro) instruction
+ // and MaybeRecordImplicitNullCheck.
- // The actual reference load.
if (index.IsValid()) {
// Load types involving an "index": ArrayGet,
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
// intrinsics.
- // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
size_t computed_offset =
(Int32ConstantFrom(index) << scale_factor) + offset;
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
} else {
// Handle the special case of the
// UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
@@ -7424,46 +7487,27 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
? LowRegisterFrom(index)
: RegisterFrom(index);
UseScratchRegisterScope temps(GetVIXLAssembler());
- const vixl32::Register temp3 = temps.Acquire();
- __ Add(temp3, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp3, offset);
+ vixl32::Register temp = temps.Acquire();
+ __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
+ {
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
}
} else {
- // /* HeapReference<Object> */ ref = *(obj + offset)
+ // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
}
// Object* ref = ref_addr->AsMirrorPtr()
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-
- // Slow path marking the object `ref` when it is gray.
- SlowPathCodeARMVIXL* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address
- // of the form `obj + field_offset`, where `obj` is a register and
- // `field_offset` is a register pair (of which only the lower half
- // is used). Thus `offset` and `scale_factor` above are expected
- // to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
- } else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, ref);
- }
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
- __ B(cs, slow_path->GetEntryLabel()); // Carry flag is the last bit shifted out by LSRS.
- __ Bind(slow_path->GetExitLabel());
}
void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 3f52c72bd4..2a636dbd99 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -45,6 +45,11 @@ static constexpr bool kArmUseVIXL32 = true;
namespace art {
namespace arm {
+// This constant is used as an approximate margin when emission of veneer and literal pools
+// must be blocked.
+static constexpr int kMaxMacroInstructionSizeInBytes =
+ 15 * vixl::aarch32::kMaxInstructionSizeInBytes;
+
static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = {
vixl::aarch32::r1,
vixl::aarch32::r2,
@@ -625,6 +630,15 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
bool always_update_field = false,
vixl::aarch32::Register* temp2 = nullptr);
+ // Generate a heap reference load (with no read barrier).
+ void GenerateRawReferenceLoad(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check);
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index c262cf983d..751623c177 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1946,6 +1946,8 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
}
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
// The base destination address is computed later, as `temp2` is
// used for intermediate computations.
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 86e54294ae..f38642242d 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2732,6 +2732,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
// SystemArrayCopy implementation for Baker read barriers (see
// also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
//
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 70a3d38c13..cc4889b26a 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2264,6 +2264,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
}
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
// The base destination address is computed later, as `temp2` is
// used for intermediate computations.