diff options
author | 2017-06-30 18:34:01 +0100 | |
---|---|---|
committer | 2017-07-13 16:41:07 +0100 | |
commit | 6d729a789d3d7771e13d9445ee0be1d9d48a81b5 (patch) | |
tree | 360b9af68920f411be5fe6753aaf7ab4976385ea | |
parent | 8cfbbb826a3ab7bb680cfcd8a8148570b165d620 (diff) |
Introduce a Marking Register in ARM code generation.
When generating code for ARM, maintain the status of
Thread::Current()->GetIsGcMarking() in register R8,
dubbed MR (Marking Register), and check the value of that
register (instead of loading and checking a read barrier
marking entrypoint) in read barriers.
Test: m test-art-target
Test: m test-art-target with tree built with ART_USE_READ_BARRIER=false
Test: m test-art-host-gtest
Test: ARM device boot test
Bug: 37707231
Change-Id: I30b44254460d0bbb9f1b2adc65eca52ca3de3f53
-rw-r--r-- | compiler/jni/jni_cfi_test.cc | 5 | ||||
-rw-r--r-- | compiler/jni/jni_cfi_test_expected.inc | 31 | ||||
-rw-r--r-- | compiler/linker/arm/relative_patcher_thumb2.cc | 28 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 239 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 10 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_arm_vixl.cc | 3 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_arm_vixl.h | 2 | ||||
-rw-r--r-- | compiler/utils/arm/jni_macro_assembler_arm_vixl.cc | 16 | ||||
-rw-r--r-- | compiler/utils/arm64/jni_macro_assembler_arm64.cc | 2 | ||||
-rw-r--r-- | compiler/utils/assembler_thumb_test.cc | 4 | ||||
-rw-r--r-- | compiler/utils/assembler_thumb_test_expected.cc.inc | 12 | ||||
-rw-r--r-- | runtime/arch/arm/asm_support_arm.S | 11 | ||||
-rw-r--r-- | runtime/arch/arm/context_arm.cc | 2 | ||||
-rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 83 | ||||
-rw-r--r-- | runtime/arch/arm/registers_arm.h | 3 | ||||
-rw-r--r-- | runtime/common_runtime_test.h | 6 |
16 files changed, 261 insertions, 196 deletions
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 23106e5da1..b552a6e531 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -110,8 +110,13 @@ class JNICFITest : public CFITest { } #ifdef ART_ENABLE_CODEGEN_arm +// Run the tests for ARM only with Baker read barriers, as the +// expected generated code contains a Marking Register refresh +// instruction. +#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) TEST_ISA(kThumb2) #endif +#endif #ifdef ART_ENABLE_CODEGEN_arm64 // Run the tests for ARM64 only with Baker read barriers, as the diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc index acb8a57bec..d641fe4251 100644 --- a/compiler/jni/jni_cfi_test_expected.inc +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -1,7 +1,8 @@ static constexpr uint8_t expected_asm_kThumb2[] = { 0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90, 0x21, 0x91, 0x8D, 0xED, 0x22, 0x0A, 0x23, 0x92, 0x24, 0x93, 0x88, 0xB0, - 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D, + 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x4D, + 0xD9, 0xF8, 0x34, 0x80, 0x70, 0x47, }; static constexpr uint8_t expected_cfi_kThumb2[] = { 0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A, @@ -13,10 +14,10 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { 0x4E, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C, 0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06, 0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06, - 0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44, + 0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x4A, 0x0B, 0x0E, 0x80, 0x01, }; -// 0x00000000: push {r5, r6, r7, r8, r10, r11, lr} +// 0x00000000: push {r5,r6,r7,r8,r10,r11,lr} // 0x00000004: .cfi_def_cfa_offset: 28 // 0x00000004: .cfi_offset: r5 at cfa-28 // 0x00000004: .cfi_offset: r6 at cfa-24 @@ -25,7 +26,7 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000004: .cfi_offset: r10 at cfa-12 // 0x00000004: .cfi_offset: r11 at cfa-8 // 0x00000004: .cfi_offset: r14 at cfa-4 -// 0x00000004: vpush.f32 {s16-s31} +// 0x00000004: vpush {s16-s31} // 0x00000008: .cfi_def_cfa_offset: 92 // 0x00000008: .cfi_offset_extended: r80 at cfa-92 // 0x00000008: .cfi_offset_extended: r81 at cfa-88 @@ -43,21 +44,21 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000008: .cfi_offset_extended: r93 at cfa-40 // 0x00000008: .cfi_offset_extended: r94 at cfa-36 // 0x00000008: .cfi_offset_extended: r95 at cfa-32 -// 0x00000008: sub sp, sp, #36 +// 0x00000008: sub sp, #36 // 0x0000000a: .cfi_def_cfa_offset: 128 -// 0x0000000a: str r0, [sp, #0] +// 0x0000000a: str r0, [sp] // 0x0000000c: str r1, [sp, #132] -// 0x0000000e: vstr.f32 s0, [sp, #136] +// 0x0000000e: vstr s0, [sp, #136] // 0x00000012: str r2, [sp, #140] // 0x00000014: str r3, [sp, #144] -// 0x00000016: sub sp, sp, #32 +// 0x00000016: sub sp, #32 // 0x00000018: .cfi_def_cfa_offset: 160 -// 0x00000018: add sp, sp, #32 +// 0x00000018: add sp, #32 // 0x0000001a: .cfi_def_cfa_offset: 128 // 0x0000001a: .cfi_remember_state -// 0x0000001a: add sp, sp, #36 +// 0x0000001a: add sp, #36 // 0x0000001c: .cfi_def_cfa_offset: 92 -// 0x0000001c: vpop.f32 {s16-s31} +// 0x0000001c: vpop {s16-s31} // 0x00000020: .cfi_def_cfa_offset: 28 // 0x00000020: .cfi_restore_extended: r80 // 0x00000020: .cfi_restore_extended: r81 @@ -75,9 +76,11 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000020: .cfi_restore_extended: r93 // 0x00000020: .cfi_restore_extended: r94 // 0x00000020: .cfi_restore_extended: r95 -// 0x00000020: pop {r5, r6, r7, r8, r10, r11, pc} -// 0x00000024: .cfi_restore_state -// 0x00000024: .cfi_def_cfa_offset: 128 +// 0x00000020: pop {r5,r6,r7,r8,r10,r11,lr} +// 0x00000024: ldr r8, [tr, #52] ; is_gc_marking +// 0x00000028: bx lr +// 0x0000002a: .cfi_restore_state +// 0x0000002a: .cfi_def_cfa_offset: 128 static constexpr uint8_t expected_asm_kArm64[] = { 0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9, diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index aa5a9457b2..18d6b9ad03 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -199,6 +199,24 @@ static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, // Note: The fake dependency is unnecessary for the slow path. } +// Load the read barrier introspection entrypoint in register `entrypoint` +static void LoadReadBarrierMarkIntrospectionEntrypoint(arm::ArmVIXLAssembler& assembler, + vixl::aarch32::Register entrypoint) { + using vixl::aarch32::MemOperand; + using vixl::aarch32::ip; + // Thread Register. + const vixl::aarch32::Register tr = vixl::aarch32::r9; + + // The register where the read barrier introspection entrypoint is loaded + // is fixed: `Thumb2RelativePatcher::kBakerCcEntrypointRegister` (R4). + DCHECK_EQ(entrypoint.GetCode(), Thumb2RelativePatcher::kBakerCcEntrypointRegister); + // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); +} + void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data) { using namespace vixl::aarch32; // NOLINT(build/namespaces) @@ -233,6 +251,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + raw_ldr_offset; Register ep_reg(kBakerCcEntrypointRegister); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); if (width == BakerReadBarrierWidth::kWide) { MemOperand ldr_half_address(lr, ldr_offset + 2); __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". @@ -278,8 +297,10 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& MemOperand ldr_address(lr, ldr_offset + 2); __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", // i.e. Rm+32 because the scale in imm2 is 2. - Register ep_reg(kBakerCcEntrypointRegister); // Insert ip to the entrypoint address to create - __ Bfi(ep_reg, ip, 3, 6); // a switch case target based on the index register. + Register ep_reg(kBakerCcEntrypointRegister); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); + __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create + // a switch case target based on the index register. __ Mov(ip, base_reg); // Move the base register to ip0. __ Bx(ep_reg); // Jump to the entrypoint's array switch case. break; @@ -309,9 +330,10 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& " the highest bits and the 'forwarding address' state to have all bits set"); __ Cmp(ip, Operand(0xc0000000)); __ B(hs, &forwarding_address); + Register ep_reg(kBakerCcEntrypointRegister); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister // to art_quick_read_barrier_mark_introspection_gc_roots. - Register ep_reg(kBakerCcEntrypointRegister); int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 7334678f99..d7e0f51a4a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -740,7 +740,9 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { // `ref`. // // Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { protected: ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint) @@ -813,9 +815,10 @@ class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { // another thread, or if another thread installed another object // reference (different from `ref`) in `obj.field`). // -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// is when the decision to mark is based on whether the GC is marking. +// Argument `entrypoint` must be a register location holding the read +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { public: ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, @@ -861,7 +864,9 @@ class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL // reference (different from `ref`) in `obj.field`). // // Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { public: LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction, @@ -872,7 +877,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS ScaleFactor scale_factor, bool needs_null_check, vixl32::Register temp, - Location entrypoint) + Location entrypoint = Location::NoLocation()) : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), obj_(obj), offset_(offset), @@ -1006,22 +1011,24 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). // -// // Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp1, - vixl32::Register temp2, - Location entrypoint) + LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( + HInstruction* instruction, + Location ref, + vixl32::Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + bool needs_null_check, + vixl32::Register temp1, + vixl32::Register temp2, + Location entrypoint = Location::NoLocation()) : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), obj_(obj), offset_(offset), @@ -2310,7 +2317,8 @@ static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codege } } -static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) { +static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, + CodeGeneratorARMVIXL* codegen) { const Primitive::Type type = cond->GetLeft()->GetType(); DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; @@ -2576,6 +2584,11 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { blocked_core_registers_[LR] = true; blocked_core_registers_[PC] = true; + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Reserve marking register. + blocked_core_registers_[MR] = true; + } + // Reserve thread register. blocked_core_registers_[TR] = true; @@ -8531,20 +8544,17 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Baker's read barrier are used. if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && !Runtime::Current()->UseJitCompilation()) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` (actually kBakerCcEntrypointRegister) the read - // barrier mark introspection entrypoint. If `temp` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. // // We use link-time generated thunks for the slow path. That thunk // checks the reference and jumps to the entrypoint if needed. // - // temp = Thread::Current()->pReadBarrierMarkIntrospection // lr = &return_address; // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { - // goto gc_root_thunk<root_reg>(lr) + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) // } // return_address: @@ -8555,18 +8565,10 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( root_reg.GetCode(), narrow); vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); - // entrypoint_reg = - // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip.GetCode(), 12u); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); - __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); - - vixl::EmissionCheckScope guard(GetVIXLAssembler(), - 4 * vixl32::kMaxInstructionSizeInBytes); + vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(kBakerCcEntrypointRegister, Operand(0)); + __ cmp(mr, Operand(0)); // Currently the offset is always within range. If that changes, // we shall have to split the load the same way as for fields. DCHECK_LT(offset, kReferenceLoadMinFarOffset); @@ -8578,34 +8580,23 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // if (mr) { // Thread::Current()->GetIsGcMarking() // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. // } - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = LocationFrom(lr); + // Slow path marking the GC root `root`. The entrypoint will + // be loaded by the slow path code. SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( - instruction, root, /* entrypoint */ temp); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); static_assert( @@ -8616,9 +8607,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); + __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } } else { @@ -8659,20 +8648,19 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i if (kBakerReadBarrierLinkTimeThunksEnableForFields && !Runtime::Current()->UseJitCompilation()) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (actually kBakerCcEntrypointRegister) the read - // barrier mark introspection entrypoint. If `temp` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. // // We use link-time generated thunks for the slow path. That thunk checks // the holder and jumps to the entrypoint if needed. If the holder is not // gray, it creates a fake dependency and returns to the LDR instruction. // - // temp = Thread::Current()->pReadBarrierMarkIntrospection // lr = &gray_return_address; - // if (temp != nullptr) { - // goto field_thunk<holder_reg, base_reg>(lr) + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto field_thunk<holder_reg, base_reg>(lr) // } // not_gray_return_address: // // Original reference load. If the offset is too large to fit @@ -8701,19 +8689,12 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); - // entrypoint_reg = - // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip.GetCode(), 12u); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); - __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); - vixl::EmissionCheckScope guard( GetVIXLAssembler(), (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(kBakerCcEntrypointRegister, Operand(0)); + __ cmp(mr, Operand(0)); EmitPlaceholderBne(this, bne_label); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); @@ -8760,20 +8741,19 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i if (kBakerReadBarrierLinkTimeThunksEnableForArrays && !Runtime::Current()->UseJitCompilation()) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (actually kBakerCcEntrypointRegister) the read - // barrier mark introspection entrypoint. If `temp` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. // // We use link-time generated thunks for the slow path. That thunk checks // the holder and jumps to the entrypoint if needed. If the holder is not // gray, it creates a fake dependency and returns to the LDR instruction. // - // temp = Thread::Current()->pReadBarrierMarkIntrospection // lr = &gray_return_address; - // if (temp != nullptr) { - // goto field_thunk<holder_reg, base_reg>(lr) + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) // } // not_gray_return_address: // // Original reference load. If the offset is too large to fit @@ -8793,20 +8773,13 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); - // entrypoint_reg = - // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip.GetCode(), 12u); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); - __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); __ Add(data_reg, obj, Operand(data_offset)); - vixl::EmissionCheckScope guard( GetVIXLAssembler(), (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(kBakerCcEntrypointRegister, Operand(0)); + __ cmp(mr, Operand(0)); EmitPlaceholderBne(this, bne_label); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); @@ -8838,26 +8811,21 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // Query `art::Thread::Current()->GetIsGcMarking()` to decide - // whether we need to enter the slow path to mark the reference. - // Then, in the slow path, check the gray bit in the lock word of - // the reference's holder (`obj`) to decide whether to mark `ref` or - // not. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. // - // Note that we do not actually check the value of `GetIsGcMarking()`; - // instead, we load into `temp2` the read barrier mark entry point - // corresponding to register `ref`. If `temp2` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. - // - // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // if (mr) { // Thread::Current()->GetIsGcMarking() // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<mirror::Object> ref = *src; // Original reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { - // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. // } // } else { // HeapReference<mirror::Object> ref = *src; // Original reference load. @@ -8866,30 +8834,13 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio vixl32::Register temp_reg = RegisterFrom(temp); // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp2`. - Location temp2 = LocationFrom(lr); + // entrypoint will be loaded by the slow path code. SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp2); + instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg); AddSlowPath(slow_path); - // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel()); + __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); __ Bind(slow_path->GetExitLabel()); @@ -8905,19 +8856,14 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // Query `art::Thread::Current()->GetIsGcMarking()` to decide - // whether we need to enter the slow path to update the reference - // field within `obj`. Then, in the slow path, check the gray bit - // in the lock word of the reference's holder (`obj`) to decide - // whether to mark `ref` and update the field or not. - // - // Note that we do not actually check the value of `GetIsGcMarking()`; - // instead, we load into `temp3` the read barrier mark entry point - // corresponding to register `ref`. If `temp3` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to update the reference field within `obj`. Then, in the + // slow path, check the gray bit in the lock word of the reference's + // holder (`obj`) to decide whether to mark `ref` and update the + // field or not. // - // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // if (mr) { // Thread::Current()->GetIsGcMarking() // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -8925,7 +8871,8 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // old_ref = ref; - // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. // compareAndSwapObject(obj, field_offset, old_ref, ref); // } // } @@ -8933,8 +8880,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction vixl32::Register temp_reg = RegisterFrom(temp); // Slow path updating the object reference at address `obj + field_offset` - // when the GC is marking. The entrypoint will already be loaded in `temp3`. - Location temp3 = LocationFrom(lr); + // when the GC is marking. The entrypoint will be loaded by the slow path code. SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( instruction, @@ -8945,19 +8891,10 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction /* scale_factor */ ScaleFactor::TIMES_1, needs_null_check, temp_reg, - temp2, - /* entrypoint */ temp3); + temp2); AddSlowPath(slow_path); - // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel()); + __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); // Fast path: the GC is not marking: nothing to do (the field is // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index ad3283ad4f..55847237d9 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -80,12 +80,16 @@ static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0; static const vixl::aarch32::Register kCoreAlwaysSpillRegister = vixl::aarch32::r5; -// Callee saves core registers r5, r6, r7, r8, r10, r11, and lr. +// Callee saves core registers r5, r6, r7, r8 (except when emitting Baker +// read barriers, where it is used as Marking Register), r10, r11, and lr. static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList::Union( vixl::aarch32::RegisterList(vixl::aarch32::r5, vixl::aarch32::r6, - vixl::aarch32::r7, - vixl::aarch32::r8), + vixl::aarch32::r7), + // Do not consider r8 as a callee-save register with Baker read barriers. + ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) + ? vixl::aarch32::RegisterList() + : vixl::aarch32::RegisterList(vixl::aarch32::r8)), vixl::aarch32::RegisterList(vixl::aarch32::r10, vixl::aarch32::r11, vixl::aarch32::lr)); diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index eb3f870432..af3b4474e3 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -37,7 +37,10 @@ namespace arm { #define ___ vixl_masm_. #endif +// Thread register definition. extern const vixl32::Register tr(TR); +// Marking register definition. +extern const vixl32::Register mr(MR); void ArmVIXLAssembler::FinalizeCode() { vixl_masm_.FinalizeCode(); diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index e81e767575..66b22ea87c 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -241,6 +241,8 @@ class ArmVIXLAssembler FINAL : public Assembler { // Thread register declaration. extern const vixl32::Register tr; +// Marking register declaration. +extern const vixl32::Register mr; } // namespace arm } // namespace art diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index d07c047253..bebe64c2b9 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -120,8 +120,8 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, CHECK_ALIGNED(frame_size, kStackAlignment); cfi().RememberState(); - // Compute callee saves to pop and PC. - RegList core_spill_mask = 1 << PC; + // Compute callee saves to pop and LR. + RegList core_spill_mask = 1 << LR; uint32_t fp_spill_mask = 0; for (const ManagedRegister& reg : callee_save_regs) { if (reg.AsArm().IsCoreRegister()) { @@ -136,6 +136,7 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, CHECK_GT(frame_size, pop_values * kFramePointerSize); DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. + // Pop FP callee saves. if (fp_spill_mask != 0) { uint32_t first = CTZ(fp_spill_mask); // Check that list is contiguous. @@ -146,9 +147,18 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, cfi().RestoreMany(DWARFReg(s0), fp_spill_mask); } - // Pop callee saves and PC. + // Pop core callee saves and LR. ___ Pop(RegisterList(core_spill_mask)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Refresh Mark Register. + // TODO: Refresh MR only if suspend is taken. + ___ Ldr(mr, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value())); + } + + // Return to LR. + ___ Bx(vixl32::lr); + // The CFI should be restored for any code that follows the exit block. cfi().RestoreState(); cfi().DefCFAOffset(frame_size); diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index c436fd902c..bab84bea4c 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -782,7 +782,7 @@ void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size, // Decrease frame size to start of callee saved regs. DecreaseFrameSize(frame_size); - // Pop callee saved and return to LR. + // Return to LR. ___ Ret(); // The CFI should be restored for any code that follows the exit block. diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 4e9b619979..759ed38601 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -1643,6 +1643,10 @@ void EmitAndCheck(JniAssemblerType* assembler, const char* testname) { #define __ assembler. TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { + // Run the test only with Baker read barriers, as the expected + // generated code contains a Marking Register refresh instruction. + TEST_DISABLED_WITHOUT_BAKER_READ_BARRIERS(); + const bool is_static = true; const bool is_synchronized = false; const bool is_critical_native = false; diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index eaaf81518a..563d1351e4 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5595,7 +5595,7 @@ const char* const VixlJniHelpersResults[] = { " 1dc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", " 1e0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", " 1e4: f000 b802 b.w 1ec <VixlJniHelpers+0x1ec>\n", - " 1e8: f000 b818 b.w 21c <VixlJniHelpers+0x21c>\n", + " 1e8: f000 b81b b.w 222 <VixlJniHelpers+0x222>\n", " 1ec: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", " 1f0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", " 1f4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", @@ -5608,10 +5608,12 @@ const char* const VixlJniHelpersResults[] = { " 210: b008 add sp, #32\n", " 212: b009 add sp, #36 ; 0x24\n", " 214: ecbd 8a10 vpop {s16-s31}\n", - " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", - " 21c: 4660 mov r0, ip\n", - " 21e: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n", - " 222: 47e0 blx ip\n", + " 218: e8bd 4de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, lr}\n", + " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", + " 220: 4770 bx lr\n", + " 222: 4660 mov r0, ip\n", + " 224: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n", + " 228: 47e0 blx ip\n", nullptr }; diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S index 9eca86232d..eeac743df2 100644 --- a/runtime/arch/arm/asm_support_arm.S +++ b/runtime/arch/arm/asm_support_arm.S @@ -26,6 +26,13 @@ // Register holding Thread::Current(). #define rSELF r9 +#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) +// Marking Register, holding Thread::Current()->GetIsGcMarking(). +// Only used with the Concurrent Copying (CC) garbage +// collector, with the Baker read barrier configuration. +#define rMR r8 +#endif + .syntax unified .arch armv7-a .thumb @@ -121,14 +128,14 @@ END \name .endm -// Macros to poison (negate) the reference for heap poisoning. +// Macro to poison (negate) the reference for heap poisoning. .macro POISON_HEAP_REF rRef #ifdef USE_HEAP_POISONING rsb \rRef, \rRef, #0 #endif // USE_HEAP_POISONING .endm -// Macros to unpoison (negate) the reference for heap poisoning. +// Macro to unpoison (negate) the reference for heap poisoning. .macro UNPOISON_HEAP_REF rRef #ifdef USE_HEAP_POISONING rsb \rRef, \rRef, #0 diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc index 0db14fb8a5..711452cffb 100644 --- a/runtime/arch/arm/context_arm.cc +++ b/runtime/arch/arm/context_arm.cc @@ -108,7 +108,9 @@ void ArmContext::DoLongJump() { for (size_t i = 0; i < kNumberOfSRegisters; ++i) { fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : ArmContext::kBadFprBase + i; } + // Ensure the Thread Register contains the address of the current thread. DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]); + // The Marking Register will be updated by art_quick_do_long_jump. art_quick_do_long_jump(gprs, fprs); } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index b909bda3f7..b4002f09de 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -67,6 +67,9 @@ * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). */ .macro SETUP_SAVE_REFS_ONLY_FRAME rTemp + // Note: We could avoid saving R8 in the case of Baker read + // barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. push {r5-r8, r10-r11, lr} @ 7 words of callee saves .cfi_adjust_cfa_offset 28 .cfi_rel_offset r5, 0 @@ -93,6 +96,9 @@ .macro RESTORE_SAVE_REFS_ONLY_FRAME add sp, #4 @ bottom word holds Method* .cfi_adjust_cfa_offset -4 + // Note: Likewise, we could avoid restoring R8 in the case of Baker + // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. pop {r5-r8, r10-r11, lr} @ 7 words of callee saves .cfi_restore r5 .cfi_restore r6 @@ -104,16 +110,14 @@ .cfi_adjust_cfa_offset -28 .endm -.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN - RESTORE_SAVE_REFS_ONLY_FRAME - bx lr @ return -.endm - /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). */ .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY + // Note: We could avoid saving R8 in the case of Baker read + // barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. .cfi_adjust_cfa_offset 40 .cfi_rel_offset r1, 0 @@ -156,6 +160,9 @@ .cfi_adjust_cfa_offset -8 vpop {s0-s15} .cfi_adjust_cfa_offset -64 + // Note: Likewise, we could avoid restoring X20 in the case of Baker + // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. pop {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves .cfi_restore r1 .cfi_restore r2 @@ -263,6 +270,17 @@ .cfi_adjust_cfa_offset -52 .endm +// Macro to refresh the Marking Register (R8). +// +// This macro must be called at the end of functions implementing +// entrypoints that possibly (directly or indirectly) perform a +// suspend check (before they return). +.macro REFRESH_MARKING_REGISTER +#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) + ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] +#endif +.endm + .macro RETURN_IF_RESULT_IS_ZERO cbnz r0, 1f @ result non-zero branch over bx lr @ return @@ -359,6 +377,7 @@ ENTRY \name mov r1, r9 @ pass Thread::Current bl \entrypoint @ (uint32_t field_idx, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER \return END \name .endm @@ -370,6 +389,7 @@ ENTRY \name mov r2, r9 @ pass Thread::Current bl \entrypoint @ (field_idx, Object*, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER \return END \name .endm @@ -381,6 +401,7 @@ ENTRY \name mov r3, r9 @ pass Thread::Current bl \entrypoint @ (field_idx, Object*, new_val, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here + REFRESH_MARKING_REGISTER \return END \name .endm @@ -464,6 +485,8 @@ NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFr * * On success this wrapper will restore arguments and *jump* to the target, leaving the lr * pointing back to the original caller. + * + * Clobbers IP (R12). */ .macro INVOKE_TRAMPOLINE_BODY cxx_name .extern \cxx_name @@ -473,6 +496,7 @@ NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFr bl \cxx_name @ (method_idx, this, Thread*, SP) mov r12, r1 @ save Method*->code_ RESTORE_SAVE_REFS_AND_ARGS_FRAME + REFRESH_MARKING_REGISTER cbz r0, 1f @ did we find the target? if not go to exception delivery bx r12 @ tail call to target 1: @@ -549,6 +573,8 @@ ENTRY art_quick_invoke_stub_internal mov r4, #SUSPEND_CHECK_INTERVAL @ reset r4 to suspend check interval #endif + REFRESH_MARKING_REGISTER + ldr ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] @ get pointer to the code blx ip @ call the method @@ -580,7 +606,8 @@ ENTRY art_quick_osr_stub mov r11, sp @ Save the stack pointer mov r10, r1 @ Save size of stack ldr r9, [r11, #40] @ Move managed thread pointer into r9 - mov r8, r2 @ Save the pc to call + REFRESH_MARKING_REGISTER + mov r6, r2 @ Save the pc to call sub r7, sp, #12 @ Reserve space for stack pointer, @ JValue* result, and ArtMethod* slot. and r7, #0xFFFFFFF0 @ Align stack pointer @@ -612,7 +639,7 @@ ENTRY art_quick_osr_stub .Losr_entry: sub r10, r10, #4 str lr, [sp, r10] @ Store link register per the compiler ABI - bx r8 + bx r6 END art_quick_osr_stub /* @@ -624,6 +651,7 @@ ARM_ENTRY art_quick_do_long_jump ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14) add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3 ldm r0, {r3-r13} @ load remaining gprs from argument gprs_ + REFRESH_MARKING_REGISTER ldr r0, [r0, #-12] @ load r0 value mov r1, #0 @ clear result register r1 bx r2 @ do long jump @@ -677,6 +705,7 @@ ENTRY art_quick_lock_object mov r1, r9 @ pass Thread::Current bl artLockObjectFromCode @ (Object* obj, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_ZERO DELIVER_PENDING_EXCEPTION END art_quick_lock_object @@ -686,6 +715,7 @@ ENTRY art_quick_lock_object_no_inline mov r1, r9 @ pass Thread::Current bl artLockObjectFromCode @ (Object* obj, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_ZERO DELIVER_PENDING_EXCEPTION END art_quick_lock_object_no_inline @@ -743,6 +773,7 @@ ENTRY art_quick_unlock_object mov r1, r9 @ pass Thread::Current bl artUnlockObjectFromCode @ (Object* obj, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_ZERO DELIVER_PENDING_EXCEPTION END art_quick_unlock_object @@ -753,6 +784,7 @@ ENTRY art_quick_unlock_object_no_inline mov r1, r9 @ pass Thread::Current bl artUnlockObjectFromCode @ (Object* obj, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_ZERO DELIVER_PENDING_EXCEPTION END art_quick_unlock_object_no_inline @@ -921,6 +953,7 @@ ENTRY \name mov r1, r9 @ pass Thread::Current bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER \return END \name .endm @@ -933,6 +966,7 @@ ENTRY \name mov r2, r9 @ pass Thread::Current bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER \return END \name .endm @@ -946,6 +980,7 @@ ENTRY \name @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*) bl \entrypoint RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER \return END \name .endm @@ -961,6 +996,7 @@ ENTRY \name add sp, #16 @ strip the extra frame .cfi_adjust_cfa_offset -16 RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER \return END \name .endm @@ -975,6 +1011,7 @@ ENTRY \name cbz r0, 1f @ If result is null, deliver the OOME. .cfi_remember_state RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 + REFRESH_MARKING_REGISTER bx lr .cfi_restore_state 1: @@ -987,6 +1024,9 @@ ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFro ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode +// Note: Functions `art{Get,Set}<Kind>{Static,Instance>FromCompiledCode` are +// defined by macros in runtime/entrypoints/quick/quick_field_entrypoints.cc. + /* * Called by managed code to resolve a static field and load a non-wide value. */ @@ -1006,6 +1046,7 @@ ENTRY art_quick_get64_static bl artGet64StaticFromCompiledCode @ (uint32_t field_idx, Thread*) ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER cbnz r2, 1f @ success if no exception pending bx lr @ return on success 1: @@ -1031,6 +1072,7 @@ ENTRY art_quick_get64_instance bl artGet64InstanceFromCompiledCode @ (field_idx, Object*, Thread*) ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER cbnz r2, 1f @ success if no exception pending bx lr @ return on success 1: @@ -1066,6 +1108,7 @@ ENTRY art_quick_set64_instance add sp, #16 @ release out args .cfi_adjust_cfa_offset -16 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_ZERO DELIVER_PENDING_EXCEPTION END art_quick_set64_instance @@ -1080,6 +1123,7 @@ ENTRY art_quick_set64_static add sp, #16 @ release out args .cfi_adjust_cfa_offset -16 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_ZERO DELIVER_PENDING_EXCEPTION END art_quick_set64_static @@ -1223,6 +1267,7 @@ ENTRY \c_name mov r1, r9 @ pass Thread::Current bl \cxx_name @ (mirror::Class* cls, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END \c_name .endm @@ -1315,6 +1360,7 @@ ENTRY \name mov r1, r9 // Pass Thread::Current. bl \entrypoint // (mirror::Class* klass, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END \name .endm @@ -1386,6 +1432,7 @@ ENTRY \name mov r2, r9 // pass Thread::Current bl \entrypoint RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END \name .endm @@ -1462,8 +1509,8 @@ END \name add r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) .endm -# TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove -# the entrypoint once all backends have been updated to use the size variants. +// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove +// the entrypoint once all backends have been updated to use the size variants. GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 @@ -1492,6 +1539,7 @@ ENTRY art_quick_test_suspend mov r0, rSELF bl artTestSuspendFromCode @ (Thread*) RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER bx lr END art_quick_test_suspend @@ -1499,7 +1547,9 @@ ENTRY art_quick_implicit_suspend mov r0, rSELF SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves for stack crawl bl artTestSuspendFromCode @ (Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN + RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER + bx lr END art_quick_implicit_suspend /* @@ -1518,6 +1568,7 @@ ENTRY art_quick_proxy_invoke_handler add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER cbnz r2, 1f @ success if no exception is pending vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... bx lr @ return on success @@ -1567,8 +1618,9 @@ ENTRY art_quick_resolution_trampoline blx artQuickResolutionTrampoline @ (Method* called, receiver, Thread*, SP) cbz r0, 1f @ is code pointer null? goto exception mov r12, r0 - ldr r0, [sp, #0] @ load resolved method in r0 + ldr r0, [sp, #0] @ load resolved method in r0 RESTORE_SAVE_REFS_AND_ARGS_FRAME + REFRESH_MARKING_REGISTER bx r12 @ tail-call into actual code 1: RESTORE_SAVE_REFS_AND_ARGS_FRAME @@ -1649,6 +1701,7 @@ ENTRY art_quick_generic_jni_trampoline add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER // store into fpr, for when it's a fpr return... vmov d0, r0, r1 @@ -1675,6 +1728,7 @@ ENTRY art_quick_to_interpreter_bridge add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER cbnz r2, 1f @ success if no exception is pending vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... bx lr @ return on success @@ -1705,6 +1759,7 @@ ENTRY art_quick_instrumentation_entry mov r12, r0 @ r12 holds reference to code ldr r0, [sp, #4] @ restore r0 RESTORE_SAVE_REFS_AND_ARGS_FRAME + REFRESH_MARKING_REGISTER blx r12 @ call method with lr set to art_quick_instrumentation_exit @ Deliberate fall-through into art_quick_instrumentation_exit. .type art_quick_instrumentation_exit, #function @@ -1734,6 +1789,7 @@ art_quick_instrumentation_exit: .cfi_restore r0 .cfi_restore r1 RESTORE_SAVE_REFS_ONLY_FRAME + REFRESH_MARKING_REGISTER cbz r2, .Ldo_deliver_instrumentation_exception @ Deliver exception if we got nullptr as function. bx r2 @ Otherwise, return @@ -1787,7 +1843,7 @@ END art_quick_deoptimize_from_compiled_code */ /* mul-long vAA, vBB, vCC */ ENTRY art_quick_mul_long - push {r9 - r10} + push {r9-r10} .cfi_adjust_cfa_offset 8 .cfi_rel_offset r9, 0 .cfi_rel_offset r10, 4 @@ -1797,7 +1853,7 @@ ENTRY art_quick_mul_long add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) mov r0,r9 mov r1,r10 - pop {r9 - r10} + pop {r9-r10} .cfi_adjust_cfa_offset -8 .cfi_restore r9 .cfi_restore r10 @@ -2544,6 +2600,7 @@ ENTRY art_quick_invoke_polymorphic add sp, #8 .cfi_adjust_cfa_offset -8 RESTORE_SAVE_REFS_AND_ARGS_FRAME + REFRESH_MARKING_REGISTER RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 .macro HANDLER_TABLE_OFFSET handler_label diff --git a/runtime/arch/arm/registers_arm.h b/runtime/arch/arm/registers_arm.h index 932095d0c9..d39a2a274f 100644 --- a/runtime/arch/arm/registers_arm.h +++ b/runtime/arch/arm/registers_arm.h @@ -40,7 +40,8 @@ enum Register { R13 = 13, R14 = 14, R15 = 15, - TR = 9, // thread register + MR = 8, // ART Marking Register + TR = 9, // ART Thread Register FP = 11, IP = 12, SP = 13, diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index 5893573bdd..fcf3a31fbc 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -247,6 +247,12 @@ class CheckJniAbortCatcher { return; \ } +#define TEST_DISABLED_WITHOUT_BAKER_READ_BARRIERS() \ + if (!kEmitCompilerReadBarrier || !kUseBakerReadBarrier) { \ + printf("WARNING: TEST DISABLED FOR GC WITHOUT BAKER READ BARRIER\n"); \ + return; \ + } + #define TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS() \ if (!kHostStaticBuildEnabled) { \ printf("WARNING: TEST DISABLED FOR NON-STATIC HOST BUILDS\n"); \ |