Introduce a Marking Register in ARM64 code generation.
When generating code for ARM64, maintain the status of
Thread::Current()->GetIsGcMarking() in register X20,
dubbed MR (Marking Register), and check the value of that
register (instead of loading and checking a read barrier
marking entrypoint) in read barriers.
Test: m test-art-target
Test: m test-art-target with tree built with ART_USE_READ_BARRIER=false
Test: ARM64 device boot test
Bug: 37707231
Change-Id: Ibe9bc5c99a2176b0a0476e9e9ad7fcc9f745017b
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7bf43f7..73202b4 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -404,17 +404,6 @@
// accessing the String's `value` field in String intrinsics.
static uint32_t GetArrayDataOffset(HArrayGet* array_get);
- // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
- template <PointerSize pointer_size>
- static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
- // The entry point list defines 30 ReadBarrierMarkRegX entry points.
- DCHECK_LT(reg, 30u);
- // The ReadBarrierMarkRegX entry points are ordered by increasing
- // register number in Thread::tls_Ptr_.quick_entrypoints.
- return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
- + static_cast<size_t>(pointer_size) * reg;
- }
-
void EmitParallelMoves(Location from1,
Location to1,
Primitive::Type type1,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6b9f232..92467fe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -729,7 +729,7 @@
} else {
// Entrypoint is not already loaded, load from the thread.
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
}
@@ -8428,7 +8428,7 @@
// Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(IP, 12);
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
__ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
Label return_address;
@@ -8469,7 +8469,7 @@
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
@@ -8572,7 +8572,7 @@
// Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(IP, 12);
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
__ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
Label return_address;
@@ -8655,7 +8655,7 @@
// Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(IP, 12);
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
__ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
__ AddConstant(data_reg, obj, data_offset);
@@ -8736,7 +8736,7 @@
// temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset);
@@ -8805,7 +8805,7 @@
// temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2561ed0..7e5b1a0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -672,7 +672,9 @@
// `ref`.
//
// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
protected:
ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
@@ -716,7 +718,7 @@
} else {
// Entrypoint is not already loaded, load from the thread.
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
// This runtime call does not require a stack map.
arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
}
@@ -743,9 +745,10 @@
// another thread, or if another thread installed another object
// reference (different from `ref`) in `obj.field`).
//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
public:
ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
@@ -791,7 +794,9 @@
// reference (different from `ref`) in `obj.field`).
//
// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
public:
LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
@@ -803,7 +808,7 @@
bool needs_null_check,
bool use_load_acquire,
Register temp,
- Location entrypoint)
+ Location entrypoint = Location::NoLocation())
: ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
obj_(obj),
offset_(offset),
@@ -947,20 +952,23 @@
// another object reference (different from `ref`) in `obj.field`).
//
// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
: public ReadBarrierMarkSlowPathBaseARM64 {
public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire,
- Register temp,
- Location entrypoint)
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+ HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ size_t scale_factor,
+ bool needs_null_check,
+ bool use_load_acquire,
+ Register temp,
+ Location entrypoint = Location::NoLocation())
: ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
obj_(obj),
offset_(offset),
@@ -1655,7 +1663,7 @@
// Blocked core registers:
// lr : Runtime reserved.
// tr : Runtime reserved.
- // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it.
+ // mr : Runtime reserved.
// ip1 : VIXL core temp.
// ip0 : VIXL core temp.
//
@@ -5921,20 +5929,17 @@
// Baker's read barrier are used.
if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
!Runtime::Current()->UseJitCompilation()) {
- // Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` (actually IP1) the read barrier mark introspection
- // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
- // false, and vice versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
//
// We use link-time generated thunks for the slow path. That thunk
// checks the reference and jumps to the entrypoint if needed.
//
- // temp = Thread::Current()->pReadBarrierMarkIntrospection
// lr = &return_address;
// GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) {
- // goto gc_root_thunk<root_reg>(lr)
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto gc_root_thunk<root_reg>(lr)
// }
// return_address:
@@ -5946,11 +5951,6 @@
linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
- // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
- DCHECK_EQ(ip0.GetCode(), 16u);
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
- __ Ldr(ip1, MemOperand(tr, entry_point_offset));
EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
vixl::aarch64::Label return_address;
__ adr(lr, &return_address);
@@ -5961,36 +5961,26 @@
"GC root LDR must be 2 instruction (8B) before the return address label.");
__ ldr(root_reg, MemOperand(obj.X(), offset));
__ Bind(cbnz_label);
- __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
__ Bind(&return_address);
} else {
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
//
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
// GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
// // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call.
// }
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Register temp = lr;
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
- instruction, root, /* entrypoint */ LocationFrom(temp));
+ // Slow path marking the GC root `root`. The entrypoint will
+ // be loaded by the slow path code.
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
codegen_->AddSlowPath(slow_path);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ Ldr(temp, MemOperand(tr, entry_point_offset));
-
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
@@ -6005,9 +5995,7 @@
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Cbnz(mr, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
} else {
@@ -6048,20 +6036,19 @@
if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
!use_load_acquire &&
!Runtime::Current()->UseJitCompilation()) {
- // Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded reference or not. Instead, we
- // load into `temp` (actually IP1) the read barrier mark introspection
- // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
- // false, and vice versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
//
// We use link-time generated thunks for the slow path. That thunk checks
// the holder and jumps to the entrypoint if needed. If the holder is not
// gray, it creates a fake dependency and returns to the LDR instruction.
//
- // temp = Thread::Current()->pReadBarrierMarkIntrospection
// lr = &gray_return_address;
- // if (temp != nullptr) {
- // goto field_thunk<holder_reg, base_reg>(lr)
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto field_thunk<holder_reg, base_reg>(lr)
// }
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
@@ -6087,17 +6074,12 @@
obj.GetCode());
vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
- // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
- DCHECK_EQ(ip0.GetCode(), 16u);
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
- __ Ldr(ip1, MemOperand(tr, entry_point_offset));
EmissionCheckScope guard(GetVIXLAssembler(),
(kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
vixl::aarch64::Label return_address;
__ adr(lr, &return_address);
__ Bind(cbnz_label);
- __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
"Field LDR must be 1 instruction (4B) before the return address label; "
" 2 instructions (8B) for heap poisoning.");
@@ -6143,20 +6125,19 @@
if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
!Runtime::Current()->UseJitCompilation()) {
- // Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded reference or not. Instead, we
- // load into `temp` (actually IP1) the read barrier mark introspection
- // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
- // false, and vice versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
//
// We use link-time generated thunks for the slow path. That thunk checks
// the holder and jumps to the entrypoint if needed. If the holder is not
// gray, it creates a fake dependency and returns to the LDR instruction.
//
- // temp = Thread::Current()->pReadBarrierMarkIntrospection
// lr = &gray_return_address;
- // if (temp != nullptr) {
- // goto field_thunk<holder_reg, base_reg>(lr)
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto array_thunk<base_reg>(lr)
// }
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
@@ -6176,18 +6157,13 @@
linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
- // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
- DCHECK_EQ(ip0.GetCode(), 16u);
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
- __ Ldr(ip1, MemOperand(tr, entry_point_offset));
__ Add(temp.X(), obj.X(), Operand(data_offset));
EmissionCheckScope guard(GetVIXLAssembler(),
(kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
vixl::aarch64::Label return_address;
__ adr(lr, &return_address);
__ Bind(cbnz_label);
- __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
"Array LDR must be 1 instruction (4B) before the return address label; "
" 2 instructions (8B) for heap poisoning.");
@@ -6231,35 +6207,28 @@
// `instruction->IsArrayGet()` => `!use_load_acquire`.
DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
- // Query `art::Thread::Current()->GetIsGcMarking()` to decide
- // whether we need to enter the slow path to mark the reference.
- // Then, in the slow path, check the gray bit in the lock word of
- // the reference's holder (`obj`) to decide whether to mark `ref` or
- // not.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
//
- // Note that we do not actually check the value of `GetIsGcMarking()`;
- // instead, we load into `temp2` the read barrier mark entry point
- // corresponding to register `ref`. If `temp2` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
- //
- // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
// // Slow path.
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
// HeapReference<mirror::Object> ref = *src; // Original reference load.
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
- // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
// }
// } else {
// HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
// Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will already be loaded in `temp2`.
- Register temp2 = lr;
- Location temp2_loc = LocationFrom(temp2);
+ // entrypoint will be loaded by the slow path code.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
instruction,
@@ -6270,19 +6239,10 @@
scale_factor,
needs_null_check,
use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
+ temp);
AddSlowPath(slow_path);
- // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ Ldr(temp2, MemOperand(tr, entry_point_offset));
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ Cbnz(temp2, slow_path->GetEntryLabel());
+ __ Cbnz(mr, slow_path->GetEntryLabel());
// Fast path: the GC is not marking: just load the reference.
GenerateRawReferenceLoad(
instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
@@ -6303,19 +6263,14 @@
// `instruction->IsArrayGet()` => `!use_load_acquire`.
DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
- // Query `art::Thread::Current()->GetIsGcMarking()` to decide
- // whether we need to enter the slow path to update the reference
- // field within `obj`. Then, in the slow path, check the gray bit
- // in the lock word of the reference's holder (`obj`) to decide
- // whether to mark `ref` and update the field or not.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to update the reference field within `obj`. Then, in the
+ // slow path, check the gray bit in the lock word of the reference's
+ // holder (`obj`) to decide whether to mark `ref` and update the
+ // field or not.
//
- // Note that we do not actually check the value of `GetIsGcMarking()`;
- // instead, we load into `temp2` the read barrier mark entry point
- // corresponding to register `ref`. If `temp2` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
- //
- // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
// // Slow path.
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -6323,15 +6278,14 @@
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
// old_ref = ref;
- // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
// compareAndSwapObject(obj, field_offset, old_ref, ref);
// }
// }
// Slow path updating the object reference at address `obj + field_offset`
- // when the GC is marking. The entrypoint will already be loaded in `temp2`.
- Register temp2 = lr;
- Location temp2_loc = LocationFrom(temp2);
+ // when the GC is marking. The entrypoint will be loaded by the slow path code.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
instruction,
@@ -6342,19 +6296,10 @@
/* scale_factor */ 0u /* "times 1" */,
needs_null_check,
use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
+ temp);
AddSlowPath(slow_path);
- // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ Ldr(temp2, MemOperand(tr, entry_point_offset));
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ Cbnz(temp2, slow_path->GetEntryLabel());
+ __ Cbnz(mr, slow_path->GetEntryLabel());
// Fast path: the GC is not marking: nothing to do (the field is
// up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d9c49d1..584eead 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -70,21 +70,32 @@
};
static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
-// Thread Register
+// Thread Register.
const vixl::aarch64::Register tr = vixl::aarch64::x19;
+// Marking Register.
+const vixl::aarch64::Register mr = vixl::aarch64::x20;
// Method register on invoke.
static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
vixl::aarch64::ip1);
const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
-const vixl::aarch64::CPURegList runtime_reserved_core_registers(tr, vixl::aarch64::lr);
+const vixl::aarch64::CPURegList runtime_reserved_core_registers =
+ vixl::aarch64::CPURegList(
+ tr,
+ // Reserve X20 as Marking Register when emitting Baker read barriers.
+ ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
+ vixl::aarch64::lr);
-// Callee-saved registers AAPCS64 (without x19 - Thread Register)
-const vixl::aarch64::CPURegList callee_saved_core_registers(vixl::aarch64::CPURegister::kRegister,
- vixl::aarch64::kXRegSize,
- vixl::aarch64::x20.GetCode(),
- vixl::aarch64::x30.GetCode());
+// Callee-save registers AAPCS64, without x19 (Thread Register) (nor
+// x20 (Marking Register) when emitting Baker read barriers).
+const vixl::aarch64::CPURegList callee_saved_core_registers(
+ vixl::aarch64::CPURegister::kRegister,
+ vixl::aarch64::kXRegSize,
+ ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
+ ? vixl::aarch64::x21.GetCode()
+ : vixl::aarch64::x20.GetCode()),
+ vixl::aarch64::x30.GetCode());
const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister,
vixl::aarch64::kDRegSize,
vixl::aarch64::d8.GetCode(),
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9a2402b..7334678 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -786,7 +786,7 @@
} else {
// Entrypoint is not already loaded, load from the thread.
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
}
@@ -8559,7 +8559,7 @@
// Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(ip.GetCode(), 12u);
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
__ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
vixl::EmissionCheckScope guard(GetVIXLAssembler(),
@@ -8601,7 +8601,7 @@
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
@@ -8705,7 +8705,7 @@
// Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(ip.GetCode(), 12u);
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
__ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
vixl::EmissionCheckScope guard(
@@ -8797,7 +8797,7 @@
// Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(ip.GetCode(), 12u);
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
__ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
__ Add(data_reg, obj, Operand(data_offset));
@@ -8883,7 +8883,7 @@
// temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset);
@@ -8951,7 +8951,7 @@
// temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index abe1d70..be8f9e9 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -656,7 +656,7 @@
__ NopIfNoReordering();
} else {
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+ Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
// This runtime call does not require a stack map.
mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
instruction_,
@@ -750,7 +750,7 @@
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+ Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
// This runtime call does not require a stack map.
mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
instruction_,
@@ -6497,7 +6497,7 @@
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
+ Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 232241c..cf6b3d5 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -606,7 +606,7 @@
__ Nop();
} else {
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+ Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
// This runtime call does not require a stack map.
mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
instruction_,
@@ -699,7 +699,7 @@
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+ Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
// This runtime call does not require a stack map.
mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
instruction_,
@@ -4421,7 +4421,7 @@
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
+ Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 79fccfe..af0e646 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -509,8 +509,7 @@
//
// rX <- ReadBarrierMarkRegX(rX)
//
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
+ int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
@@ -595,8 +594,7 @@
//
// rX <- ReadBarrierMarkRegX(rX)
//
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
+ int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
@@ -7153,7 +7151,7 @@
// Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
__ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
// The entrypoint is null when the GC is not marking.
__ j(kNotEqual, slow_path->GetEntryLabel());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 57319ce..86f6d51 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -524,7 +524,7 @@
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
+ Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
@@ -615,7 +615,7 @@
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
+ Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
@@ -6540,7 +6540,7 @@
// Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
__ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
// The entrypoint is null when the GC is not marking.
__ j(kNotEqual, slow_path->GetEntryLabel());
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index ae5f8d1..3795866 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -154,8 +154,7 @@
DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
// TODO: Load the entrypoint once before the loop, instead of
// loading it at every iteration.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+ int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ MaybePoisonHeapReference(tmp);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 37d7981..aec1ec7 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -205,7 +205,7 @@
// TODO: Load the entrypoint once before the loop, instead of
// loading it at every iteration.
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
// This runtime call does not require a stack map.
codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 3c9b613..ced931b 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -226,7 +226,7 @@
// TODO: Load the entrypoint once before the loop, instead of
// loading it at every iteration.
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
assembler->MaybePoisonHeapReference(tmp);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 6b4851d..a18b0cc 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -143,8 +143,7 @@
// explanations.)
DCHECK_NE(temp2, ESP);
DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+ int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
// This runtime call does not require a stack map.
x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ MaybePoisonHeapReference(temp2);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ef98b7b..5abdb1d 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -105,8 +105,7 @@
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+ int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
// This runtime call does not require a stack map.
x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ MaybePoisonHeapReference(CpuRegister(TMP));
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 60af2b4..abab431 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -31,21 +31,21 @@
// 0x00000010: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kArm64[] = {
- 0xFF, 0x03, 0x01, 0xD1, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
- 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
- 0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+ 0xFF, 0x03, 0x01, 0xD1, 0xF5, 0x17, 0x00, 0xF9, 0xF6, 0x7B, 0x03, 0xA9,
+ 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF5, 0x17, 0x40, 0xF9,
+ 0xF6, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
};
static constexpr uint8_t expected_cfi_kArm64[] = {
- 0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44,
+ 0x44, 0x0E, 0x40, 0x44, 0x95, 0x06, 0x44, 0x96, 0x04, 0x9E, 0x02, 0x44,
0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
- 0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+ 0x44, 0xD5, 0x44, 0xD6, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
// 0x00000000: sub sp, sp, #0x40 (64)
// 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: str x20, [sp, #40]
-// 0x00000008: .cfi_offset: r20 at cfa-24
-// 0x00000008: stp x21, lr, [sp, #48]
-// 0x0000000c: .cfi_offset: r21 at cfa-16
+// 0x00000004: str x21, [sp, #40]
+// 0x00000008: .cfi_offset: r21 at cfa-24
+// 0x00000008: stp x22, lr, [sp, #48]
+// 0x0000000c: .cfi_offset: r22 at cfa-16
// 0x0000000c: .cfi_offset: r30 at cfa-8
// 0x0000000c: stp d8, d9, [sp, #24]
// 0x00000010: .cfi_offset_extended: r72 at cfa-40
@@ -54,10 +54,10 @@
// 0x00000010: ldp d8, d9, [sp, #24]
// 0x00000014: .cfi_restore_extended: r72
// 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldr x20, [sp, #40]
-// 0x00000018: .cfi_restore: r20
-// 0x00000018: ldp x21, lr, [sp, #48]
-// 0x0000001c: .cfi_restore: r21
+// 0x00000014: ldr x21, [sp, #40]
+// 0x00000018: .cfi_restore: r21
+// 0x00000018: ldp x22, lr, [sp, #48]
+// 0x0000001c: .cfi_restore: r22
// 0x0000001c: .cfi_restore: r30
// 0x0000001c: add sp, sp, #0x40 (64)
// 0x00000020: .cfi_def_cfa_offset: 0