Introduce a Marking Register in ARM code generation.
When generating code for ARM, maintain the status of
Thread::Current()->GetIsGcMarking() in register R8,
dubbed MR (Marking Register), and check the value of that
register (instead of loading and checking a read barrier
marking entrypoint) in read barriers.
Test: m test-art-target
Test: m test-art-target with tree built with ART_USE_READ_BARRIER=false
Test: m test-art-host-gtest
Test: ARM device boot test
Bug: 37707231
Change-Id: I30b44254460d0bbb9f1b2adc65eca52ca3de3f53
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 23106e5..b552a6e 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -110,8 +110,13 @@
}
#ifdef ART_ENABLE_CODEGEN_arm
+// Run the tests for ARM only with Baker read barriers, as the
+// expected generated code contains a Marking Register refresh
+// instruction.
+#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
TEST_ISA(kThumb2)
#endif
+#endif
#ifdef ART_ENABLE_CODEGEN_arm64
// Run the tests for ARM64 only with Baker read barriers, as the
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index acb8a57..d641fe4 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -1,7 +1,8 @@
static constexpr uint8_t expected_asm_kThumb2[] = {
0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
0x21, 0x91, 0x8D, 0xED, 0x22, 0x0A, 0x23, 0x92, 0x24, 0x93, 0x88, 0xB0,
- 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+ 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x4D,
+ 0xD9, 0xF8, 0x34, 0x80, 0x70, 0x47,
};
static constexpr uint8_t expected_cfi_kThumb2[] = {
0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
@@ -13,10 +14,10 @@
0x4E, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
- 0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
+ 0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x4A,
0x0B, 0x0E, 0x80, 0x01,
};
-// 0x00000000: push {r5, r6, r7, r8, r10, r11, lr}
+// 0x00000000: push {r5,r6,r7,r8,r10,r11,lr}
// 0x00000004: .cfi_def_cfa_offset: 28
// 0x00000004: .cfi_offset: r5 at cfa-28
// 0x00000004: .cfi_offset: r6 at cfa-24
@@ -25,7 +26,7 @@
// 0x00000004: .cfi_offset: r10 at cfa-12
// 0x00000004: .cfi_offset: r11 at cfa-8
// 0x00000004: .cfi_offset: r14 at cfa-4
-// 0x00000004: vpush.f32 {s16-s31}
+// 0x00000004: vpush {s16-s31}
// 0x00000008: .cfi_def_cfa_offset: 92
// 0x00000008: .cfi_offset_extended: r80 at cfa-92
// 0x00000008: .cfi_offset_extended: r81 at cfa-88
@@ -43,21 +44,21 @@
// 0x00000008: .cfi_offset_extended: r93 at cfa-40
// 0x00000008: .cfi_offset_extended: r94 at cfa-36
// 0x00000008: .cfi_offset_extended: r95 at cfa-32
-// 0x00000008: sub sp, sp, #36
+// 0x00000008: sub sp, #36
// 0x0000000a: .cfi_def_cfa_offset: 128
-// 0x0000000a: str r0, [sp, #0]
+// 0x0000000a: str r0, [sp]
// 0x0000000c: str r1, [sp, #132]
-// 0x0000000e: vstr.f32 s0, [sp, #136]
+// 0x0000000e: vstr s0, [sp, #136]
// 0x00000012: str r2, [sp, #140]
// 0x00000014: str r3, [sp, #144]
-// 0x00000016: sub sp, sp, #32
+// 0x00000016: sub sp, #32
// 0x00000018: .cfi_def_cfa_offset: 160
-// 0x00000018: add sp, sp, #32
+// 0x00000018: add sp, #32
// 0x0000001a: .cfi_def_cfa_offset: 128
// 0x0000001a: .cfi_remember_state
-// 0x0000001a: add sp, sp, #36
+// 0x0000001a: add sp, #36
// 0x0000001c: .cfi_def_cfa_offset: 92
-// 0x0000001c: vpop.f32 {s16-s31}
+// 0x0000001c: vpop {s16-s31}
// 0x00000020: .cfi_def_cfa_offset: 28
// 0x00000020: .cfi_restore_extended: r80
// 0x00000020: .cfi_restore_extended: r81
@@ -75,9 +76,11 @@
// 0x00000020: .cfi_restore_extended: r93
// 0x00000020: .cfi_restore_extended: r94
// 0x00000020: .cfi_restore_extended: r95
-// 0x00000020: pop {r5, r6, r7, r8, r10, r11, pc}
-// 0x00000024: .cfi_restore_state
-// 0x00000024: .cfi_def_cfa_offset: 128
+// 0x00000020: pop {r5,r6,r7,r8,r10,r11,lr}
+// 0x00000024: ldr r8, [tr, #52] ; is_gc_marking
+// 0x00000028: bx lr
+// 0x0000002a: .cfi_restore_state
+// 0x0000002a: .cfi_def_cfa_offset: 128
static constexpr uint8_t expected_asm_kArm64[] = {
0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9,
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index aa5a945..18d6b9a 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -199,6 +199,24 @@
// Note: The fake dependency is unnecessary for the slow path.
}
+// Load the read barrier introspection entrypoint in register `entrypoint`
+static void LoadReadBarrierMarkIntrospectionEntrypoint(arm::ArmVIXLAssembler& assembler,
+ vixl::aarch32::Register entrypoint) {
+ using vixl::aarch32::MemOperand;
+ using vixl::aarch32::ip;
+ // Thread Register.
+ const vixl::aarch32::Register tr = vixl::aarch32::r9;
+
+ // The register where the read barrier introspection entrypoint is loaded
+ // is fixed: `Thumb2RelativePatcher::kBakerCcEntrypointRegister` (R4).
+ DCHECK_EQ(entrypoint.GetCode(), Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler,
uint32_t encoded_data) {
using namespace vixl::aarch32; // NOLINT(build/namespaces)
@@ -233,6 +251,7 @@
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
raw_ldr_offset;
Register ep_reg(kBakerCcEntrypointRegister);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
if (width == BakerReadBarrierWidth::kWide) {
MemOperand ldr_half_address(lr, ldr_offset + 2);
__ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
@@ -278,8 +297,10 @@
MemOperand ldr_address(lr, ldr_offset + 2);
__ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
// i.e. Rm+32 because the scale in imm2 is 2.
- Register ep_reg(kBakerCcEntrypointRegister); // Insert ip to the entrypoint address to create
- __ Bfi(ep_reg, ip, 3, 6); // a switch case target based on the index register.
+ Register ep_reg(kBakerCcEntrypointRegister);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
+ // a switch case target based on the index register.
__ Mov(ip, base_reg); // Move the base register to ip0.
__ Bx(ep_reg); // Jump to the entrypoint's array switch case.
break;
@@ -309,9 +330,10 @@
" the highest bits and the 'forwarding address' state to have all bits set");
__ Cmp(ip, Operand(0xc0000000));
__ B(hs, &forwarding_address);
+ Register ep_reg(kBakerCcEntrypointRegister);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
// Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
// to art_quick_read_barrier_mark_introspection_gc_roots.
- Register ep_reg(kBakerCcEntrypointRegister);
int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
: BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 7334678..d7e0f51 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -740,7 +740,9 @@
// `ref`.
//
// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
protected:
ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
@@ -813,9 +815,10 @@
// another thread, or if another thread installed another object
// reference (different from `ref`) in `obj.field`).
//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
public:
ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
@@ -861,7 +864,9 @@
// reference (different from `ref`) in `obj.field`).
//
// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
public:
LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction,
@@ -872,7 +877,7 @@
ScaleFactor scale_factor,
bool needs_null_check,
vixl32::Register temp,
- Location entrypoint)
+ Location entrypoint = Location::NoLocation())
: ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
obj_(obj),
offset_(offset),
@@ -1006,22 +1011,24 @@
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
//
-//
// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
: public ReadBarrierMarkSlowPathBaseARMVIXL {
public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- vixl32::Register temp1,
- vixl32::Register temp2,
- Location entrypoint)
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+ HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ vixl32::Register temp1,
+ vixl32::Register temp2,
+ Location entrypoint = Location::NoLocation())
: ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
obj_(obj),
offset_(offset),
@@ -2310,7 +2317,8 @@
}
}
-static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
+ CodeGeneratorARMVIXL* codegen) {
const Primitive::Type type = cond->GetLeft()->GetType();
DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
@@ -2576,6 +2584,11 @@
blocked_core_registers_[LR] = true;
blocked_core_registers_[PC] = true;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Reserve marking register.
+ blocked_core_registers_[MR] = true;
+ }
+
// Reserve thread register.
blocked_core_registers_[TR] = true;
@@ -8531,20 +8544,17 @@
// Baker's read barrier are used.
if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
!Runtime::Current()->UseJitCompilation()) {
- // Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` (actually kBakerCcEntrypointRegister) the read
- // barrier mark introspection entrypoint. If `temp` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
//
// We use link-time generated thunks for the slow path. That thunk
// checks the reference and jumps to the entrypoint if needed.
//
- // temp = Thread::Current()->pReadBarrierMarkIntrospection
// lr = &return_address;
// GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) {
- // goto gc_root_thunk<root_reg>(lr)
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto gc_root_thunk<root_reg>(lr)
// }
// return_address:
@@ -8555,18 +8565,10 @@
root_reg.GetCode(), narrow);
vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
- // entrypoint_reg =
- // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
- DCHECK_EQ(ip.GetCode(), 12u);
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
- __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
-
- vixl::EmissionCheckScope guard(GetVIXLAssembler(),
- 4 * vixl32::kMaxInstructionSizeInBytes);
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ __ cmp(mr, Operand(0));
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
@@ -8578,34 +8580,23 @@
narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
: BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
//
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
// GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
// // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call.
// }
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = LocationFrom(lr);
+ // Slow path marking the GC root `root`. The entrypoint will
+ // be loaded by the slow path code.
SlowPathCodeARMVIXL* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction, root, /* entrypoint */ temp);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
codegen_->AddSlowPath(slow_path);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
-
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
static_assert(
@@ -8616,9 +8607,7 @@
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
} else {
@@ -8659,20 +8648,19 @@
if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
!Runtime::Current()->UseJitCompilation()) {
- // Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded reference or not. Instead, we
- // load into `temp` (actually kBakerCcEntrypointRegister) the read
- // barrier mark introspection entrypoint. If `temp` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
//
// We use link-time generated thunks for the slow path. That thunk checks
// the holder and jumps to the entrypoint if needed. If the holder is not
// gray, it creates a fake dependency and returns to the LDR instruction.
//
- // temp = Thread::Current()->pReadBarrierMarkIntrospection
// lr = &gray_return_address;
- // if (temp != nullptr) {
- // goto field_thunk<holder_reg, base_reg>(lr)
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto field_thunk<holder_reg, base_reg>(lr)
// }
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
@@ -8701,19 +8689,12 @@
base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
- // entrypoint_reg =
- // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
- DCHECK_EQ(ip.GetCode(), 12u);
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
- __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
-
vixl::EmissionCheckScope guard(
GetVIXLAssembler(),
(kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ __ cmp(mr, Operand(0));
EmitPlaceholderBne(this, bne_label);
ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
@@ -8760,20 +8741,19 @@
if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
!Runtime::Current()->UseJitCompilation()) {
- // Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded reference or not. Instead, we
- // load into `temp` (actually kBakerCcEntrypointRegister) the read
- // barrier mark introspection entrypoint. If `temp` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
//
// We use link-time generated thunks for the slow path. That thunk checks
// the holder and jumps to the entrypoint if needed. If the holder is not
// gray, it creates a fake dependency and returns to the LDR instruction.
//
- // temp = Thread::Current()->pReadBarrierMarkIntrospection
// lr = &gray_return_address;
- // if (temp != nullptr) {
- // goto field_thunk<holder_reg, base_reg>(lr)
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto array_thunk<base_reg>(lr)
// }
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
@@ -8793,20 +8773,13 @@
linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
- // entrypoint_reg =
- // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
- DCHECK_EQ(ip.GetCode(), 12u);
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
- __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
__ Add(data_reg, obj, Operand(data_offset));
-
vixl::EmissionCheckScope guard(
GetVIXLAssembler(),
(kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ __ cmp(mr, Operand(0));
EmitPlaceholderBne(this, bne_label);
ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
@@ -8838,26 +8811,21 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // Query `art::Thread::Current()->GetIsGcMarking()` to decide
- // whether we need to enter the slow path to mark the reference.
- // Then, in the slow path, check the gray bit in the lock word of
- // the reference's holder (`obj`) to decide whether to mark `ref` or
- // not.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
//
- // Note that we do not actually check the value of `GetIsGcMarking()`;
- // instead, we load into `temp2` the read barrier mark entry point
- // corresponding to register `ref`. If `temp2` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
- //
- // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
// // Slow path.
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
// HeapReference<mirror::Object> ref = *src; // Original reference load.
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
- // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
// }
// } else {
// HeapReference<mirror::Object> ref = *src; // Original reference load.
@@ -8866,30 +8834,13 @@
vixl32::Register temp_reg = RegisterFrom(temp);
// Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will already be loaded in `temp2`.
- Location temp2 = LocationFrom(lr);
+ // entrypoint will be loaded by the slow path code.
SlowPathCodeARMVIXL* slow_path =
new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- temp_reg,
- /* entrypoint */ temp2);
+ instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg);
AddSlowPath(slow_path);
- // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset);
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
// Fast path: the GC is not marking: just load the reference.
GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
__ Bind(slow_path->GetExitLabel());
@@ -8905,19 +8856,14 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // Query `art::Thread::Current()->GetIsGcMarking()` to decide
- // whether we need to enter the slow path to update the reference
- // field within `obj`. Then, in the slow path, check the gray bit
- // in the lock word of the reference's holder (`obj`) to decide
- // whether to mark `ref` and update the field or not.
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to update the reference field within `obj`. Then, in the
+ // slow path, check the gray bit in the lock word of the reference's
+ // holder (`obj`) to decide whether to mark `ref` and update the
+ // field or not.
//
- // Note that we do not actually check the value of `GetIsGcMarking()`;
- // instead, we load into `temp3` the read barrier mark entry point
- // corresponding to register `ref`. If `temp3` is null, it means
- // that `GetIsGcMarking()` is false, and vice versa.
- //
- // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
// // Slow path.
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -8925,7 +8871,8 @@
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
// old_ref = ref;
- // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
// compareAndSwapObject(obj, field_offset, old_ref, ref);
// }
// }
@@ -8933,8 +8880,7 @@
vixl32::Register temp_reg = RegisterFrom(temp);
// Slow path updating the object reference at address `obj + field_offset`
- // when the GC is marking. The entrypoint will already be loaded in `temp3`.
- Location temp3 = LocationFrom(lr);
+ // when the GC is marking. The entrypoint will be loaded by the slow path code.
SlowPathCodeARMVIXL* slow_path =
new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
instruction,
@@ -8945,19 +8891,10 @@
/* scale_factor */ ScaleFactor::TIMES_1,
needs_null_check,
temp_reg,
- temp2,
- /* entrypoint */ temp3);
+ temp2);
AddSlowPath(slow_path);
- // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
// Fast path: the GC is not marking: nothing to do (the field is
// up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index ad3283a..5584723 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -80,12 +80,16 @@
static const vixl::aarch32::Register kCoreAlwaysSpillRegister = vixl::aarch32::r5;
-// Callee saves core registers r5, r6, r7, r8, r10, r11, and lr.
+// Callee saves core registers r5, r6, r7, r8 (except when emitting Baker
+// read barriers, where it is used as Marking Register), r10, r11, and lr.
static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList::Union(
vixl::aarch32::RegisterList(vixl::aarch32::r5,
vixl::aarch32::r6,
- vixl::aarch32::r7,
- vixl::aarch32::r8),
+ vixl::aarch32::r7),
+ // Do not consider r8 as a callee-save register with Baker read barriers.
+ ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
+ ? vixl::aarch32::RegisterList()
+ : vixl::aarch32::RegisterList(vixl::aarch32::r8)),
vixl::aarch32::RegisterList(vixl::aarch32::r10,
vixl::aarch32::r11,
vixl::aarch32::lr));
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index eb3f870..af3b447 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -37,7 +37,10 @@
#define ___ vixl_masm_.
#endif
+// Thread register definition.
extern const vixl32::Register tr(TR);
+// Marking register definition.
+extern const vixl32::Register mr(MR);
void ArmVIXLAssembler::FinalizeCode() {
vixl_masm_.FinalizeCode();
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index e81e767..66b22ea 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -241,6 +241,8 @@
// Thread register declaration.
extern const vixl32::Register tr;
+// Marking register declaration.
+extern const vixl32::Register mr;
} // namespace arm
} // namespace art
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index d07c047..bebe64c 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -120,8 +120,8 @@
CHECK_ALIGNED(frame_size, kStackAlignment);
cfi().RememberState();
- // Compute callee saves to pop and PC.
- RegList core_spill_mask = 1 << PC;
+ // Compute callee saves to pop and LR.
+ RegList core_spill_mask = 1 << LR;
uint32_t fp_spill_mask = 0;
for (const ManagedRegister& reg : callee_save_regs) {
if (reg.AsArm().IsCoreRegister()) {
@@ -136,6 +136,7 @@
CHECK_GT(frame_size, pop_values * kFramePointerSize);
DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well.
+ // Pop FP callee saves.
if (fp_spill_mask != 0) {
uint32_t first = CTZ(fp_spill_mask);
// Check that list is contiguous.
@@ -146,9 +147,18 @@
cfi().RestoreMany(DWARFReg(s0), fp_spill_mask);
}
- // Pop callee saves and PC.
+ // Pop core callee saves and LR.
___ Pop(RegisterList(core_spill_mask));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Refresh Mark Register.
+ // TODO: Refresh MR only if suspend is taken.
+ ___ Ldr(mr, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+ }
+
+ // Return to LR.
+ ___ Bx(vixl32::lr);
+
// The CFI should be restored for any code that follows the exit block.
cfi().RestoreState();
cfi().DefCFAOffset(frame_size);
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index c436fd9..bab84be 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -782,7 +782,7 @@
// Decrease frame size to start of callee saved regs.
DecreaseFrameSize(frame_size);
- // Pop callee saved and return to LR.
+ // Return to LR.
___ Ret();
// The CFI should be restored for any code that follows the exit block.
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 4e9b619..759ed38 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1643,6 +1643,10 @@
#define __ assembler.
TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) {
+ // Run the test only with Baker read barriers, as the expected
+ // generated code contains a Marking Register refresh instruction.
+ TEST_DISABLED_WITHOUT_BAKER_READ_BARRIERS();
+
const bool is_static = true;
const bool is_synchronized = false;
const bool is_critical_native = false;
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index eaaf815..563d135 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5595,7 +5595,7 @@
" 1dc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n",
" 1e0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n",
" 1e4: f000 b802 b.w 1ec <VixlJniHelpers+0x1ec>\n",
- " 1e8: f000 b818 b.w 21c <VixlJniHelpers+0x21c>\n",
+ " 1e8: f000 b81b b.w 222 <VixlJniHelpers+0x222>\n",
" 1ec: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n",
" 1f0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n",
" 1f4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n",
@@ -5608,10 +5608,12 @@
" 210: b008 add sp, #32\n",
" 212: b009 add sp, #36 ; 0x24\n",
" 214: ecbd 8a10 vpop {s16-s31}\n",
- " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
- " 21c: 4660 mov r0, ip\n",
- " 21e: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n",
- " 222: 47e0 blx ip\n",
+ " 218: e8bd 4de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, lr}\n",
+ " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n",
+ " 220: 4770 bx lr\n",
+ " 222: 4660 mov r0, ip\n",
+ " 224: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n",
+ " 228: 47e0 blx ip\n",
nullptr
};