diff options
Diffstat (limited to 'compiler')
20 files changed, 199 insertions, 215 deletions
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc index 2710ae9b53..acb8a57bec 100644 --- a/compiler/jni/jni_cfi_test_expected.inc +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -89,7 +89,8 @@ static constexpr uint8_t expected_asm_kArm64[] = { 0xF3, 0x53, 0x46, 0xA9, 0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9, 0xF9, 0x6B, 0x49, 0xA9, 0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9, 0xE8, 0x27, 0x42, 0x6D, 0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D, - 0xEE, 0x3F, 0x45, 0x6D, 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6, + 0xEE, 0x3F, 0x45, 0x6D, 0x74, 0x36, 0x40, 0xB9, 0xFF, 0x03, 0x03, 0x91, + 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x93, 0x18, 0x94, 0x16, 0x44, 0x95, 0x14, @@ -101,7 +102,7 @@ static constexpr uint8_t expected_cfi_kArm64[] = { 0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA, 0x44, 0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, 0x06, 0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E, 0x06, - 0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, + 0x4F, 0x48, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, }; // 0x00000000: sub sp, sp, #0xc0 (192) // 0x00000004: .cfi_def_cfa_offset: 192 @@ -175,11 +176,12 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x0000006c: ldp d14, d15, [sp, #80] // 0x00000070: .cfi_restore_extended: r78 // 0x00000070: .cfi_restore_extended: r79 -// 0x00000070: add sp, sp, #0xc0 (192) -// 0x00000074: .cfi_def_cfa_offset: 0 -// 0x00000074: ret -// 0x00000078: .cfi_restore_state -// 0x00000078: .cfi_def_cfa_offset: 192 +// 0x00000070: ldr w20, [tr, #52] ; is_gc_marking +// 0x00000074: add sp, sp, #0xc0 (192) +// 0x00000078: .cfi_def_cfa_offset: 0 +// 0x00000078: ret +// 0x0000007c: .cfi_restore_state +// 0x0000007c: .cfi_def_cfa_offset: 192 static constexpr uint8_t expected_asm_kX86[] = { 0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3, diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index b34d9385c8..6ce7d75da6 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -49,6 +49,9 @@ extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_sbar(JNIEnv*, jclass, jint return count + 1; } +// TODO: In the Baker read barrier configuration, add checks to ensure +// the Marking Register's value is correct. + namespace art { enum class JniKind { diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 33f4d77fc2..e086455620 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -108,11 +108,25 @@ static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(); // Calling convention ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() { - return Arm64ManagedRegister::FromXRegister(X20); // saved on entry restored on exit + // X20 is safe to use as a scratch register: + // - with Baker read barriers, it is reserved as Marking Register, + // and thus does not actually need to be saved/restored; it is + // refreshed on exit (see Arm64JNIMacroAssembler::RemoveFrame); + // - in other cases, it is saved on entry (in + // Arm64JNIMacroAssembler::BuildFrame) and restored on exit (in + // Arm64JNIMacroAssembler::RemoveFrame). + return Arm64ManagedRegister::FromXRegister(X20); } ManagedRegister Arm64JniCallingConvention::InterproceduralScratchRegister() { - return Arm64ManagedRegister::FromXRegister(X20); // saved on entry restored on exit + // X20 is safe to use as a scratch register: + // - with Baker read barriers, it is reserved as Marking Register, + // and thus does not actually need to be saved/restored; it is + // refreshed on exit (see Arm64JNIMacroAssembler::RemoveFrame); + // - in other cases, it is saved on entry (in + // Arm64JNIMacroAssembler::BuildFrame) and restored on exit (in + // Arm64JNIMacroAssembler::RemoveFrame). + return Arm64ManagedRegister::FromXRegister(X20); } static ManagedRegister ReturnRegisterForShorty(const char* shorty) { diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index bc21607c5b..38c732b8ba 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -381,6 +381,21 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, // Note: The fake dependency is unnecessary for the slow path. } +// Load the read barrier introspection entrypoint in register `entrypoint`. +static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register entrypoint) { + using vixl::aarch64::MemOperand; + using vixl::aarch64::ip0; + // Thread Register. + const vixl::aarch64::Register tr = vixl::aarch64::x19; + + // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); +} + void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data) { using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -412,6 +427,7 @@ void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& a __ Bind(&slow_path); MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. @@ -441,6 +457,7 @@ void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& a __ Bind(&slow_path); MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create // a switch case target based on the index register. @@ -469,6 +486,7 @@ void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& a __ Bind(¬_marked); __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); __ B(&forwarding_address, mi); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to // art_quick_read_barrier_mark_introspection_gc_roots. __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 7bf43f7971..73202b4fd1 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -404,17 +404,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // accessing the String's `value` field in String intrinsics. static uint32_t GetArrayDataOffset(HArrayGet* array_get); - // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`. - template <PointerSize pointer_size> - static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) { - // The entry point list defines 30 ReadBarrierMarkRegX entry points. - DCHECK_LT(reg, 30u); - // The ReadBarrierMarkRegX entry points are ordered by increasing - // register number in Thread::tls_Ptr_.quick_entrypoints. - return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value() - + static_cast<size_t>(pointer_size) * reg; - } - void EmitParallelMoves(Location from1, Location to1, Primitive::Type type1, diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 6b9f232e8f..92467fe101 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -729,7 +729,7 @@ class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM { } else { // Entrypoint is not already loaded, load from the thread. int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); } @@ -8428,7 +8428,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. DCHECK_EQ(IP, 12); const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); Label return_address; @@ -8469,7 +8469,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); @@ -8572,7 +8572,7 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. DCHECK_EQ(IP, 12); const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); Label return_address; @@ -8655,7 +8655,7 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. DCHECK_EQ(IP, 12); const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); __ AddConstant(data_reg, obj, data_offset); @@ -8736,7 +8736,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset); @@ -8805,7 +8805,7 @@ void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* in // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2561ed0762..7e5b1a0fd1 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -672,7 +672,9 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { // `ref`. // // Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { protected: ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint) @@ -716,7 +718,7 @@ class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { } else { // Entrypoint is not already loaded, load from the thread. int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); // This runtime call does not require a stack map. arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); } @@ -743,9 +745,10 @@ class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { // another thread, or if another thread installed another object // reference (different from `ref`) in `obj.field`). // -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// is when the decision to mark is based on whether the GC is marking. +// Argument `entrypoint` must be a register location holding the read +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { public: ReadBarrierMarkSlowPathARM64(HInstruction* instruction, @@ -791,7 +794,9 @@ class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { // reference (different from `ref`) in `obj.field`). // // Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { public: LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction, @@ -803,7 +808,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo bool needs_null_check, bool use_load_acquire, Register temp, - Location entrypoint) + Location entrypoint = Location::NoLocation()) : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), obj_(obj), offset_(offset), @@ -947,20 +952,23 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo // another object reference (different from `ref`) in `obj.field`). // // Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. +// barrier marking runtime entry point to be invoked or an empty +// location; in the latter case, the read barrier marking runtime +// entry point will be loaded by the slow path code itself. class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint) + LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( + HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + size_t scale_factor, + bool needs_null_check, + bool use_load_acquire, + Register temp, + Location entrypoint = Location::NoLocation()) : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), obj_(obj), offset_(offset), @@ -1655,7 +1663,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { // Blocked core registers: // lr : Runtime reserved. // tr : Runtime reserved. - // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it. + // mr : Runtime reserved. // ip1 : VIXL core temp. // ip0 : VIXL core temp. // @@ -5921,20 +5929,17 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( // Baker's read barrier are used. if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && !Runtime::Current()->UseJitCompilation()) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` (actually IP1) the read barrier mark introspection - // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is - // false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. // // We use link-time generated thunks for the slow path. That thunk // checks the reference and jumps to the entrypoint if needed. // - // temp = Thread::Current()->pReadBarrierMarkIntrospection // lr = &return_address; // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { - // goto gc_root_thunk<root_reg>(lr) + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) // } // return_address: @@ -5946,11 +5951,6 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data); - // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip0.GetCode(), 16u); - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); - __ Ldr(ip1, MemOperand(tr, entry_point_offset)); EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); @@ -5961,36 +5961,26 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( "GC root LDR must be 2 instruction (8B) before the return address label."); __ ldr(root_reg, MemOperand(obj.X(), offset)); __ Bind(cbnz_label); - __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. __ Bind(&return_address); } else { - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // if (mr) { // Thread::Current()->GetIsGcMarking() // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. // } - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Register temp = lr; - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64( - instruction, root, /* entrypoint */ LocationFrom(temp)); + // Slow path marking the GC root `root`. The entrypoint will + // be loaded by the slow path code. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ Ldr(temp, MemOperand(tr, entry_point_offset)); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); @@ -6005,9 +5995,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Cbnz(mr, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } } else { @@ -6048,20 +6036,19 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins if (kBakerReadBarrierLinkTimeThunksEnableForFields && !use_load_acquire && !Runtime::Current()->UseJitCompilation()) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (actually IP1) the read barrier mark introspection - // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is - // false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. // // We use link-time generated thunks for the slow path. That thunk checks // the holder and jumps to the entrypoint if needed. If the holder is not // gray, it creates a fake dependency and returns to the LDR instruction. // - // temp = Thread::Current()->pReadBarrierMarkIntrospection // lr = &gray_return_address; - // if (temp != nullptr) { - // goto field_thunk<holder_reg, base_reg>(lr) + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto field_thunk<holder_reg, base_reg>(lr) // } // not_gray_return_address: // // Original reference load. If the offset is too large to fit @@ -6087,17 +6074,12 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins obj.GetCode()); vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip0.GetCode(), 16u); - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); - __ Ldr(ip1, MemOperand(tr, entry_point_offset)); EmissionCheckScope guard(GetVIXLAssembler(), (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); __ Bind(cbnz_label); - __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), "Field LDR must be 1 instruction (4B) before the return address label; " " 2 instructions (8B) for heap poisoning."); @@ -6143,20 +6125,19 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins if (kBakerReadBarrierLinkTimeThunksEnableForArrays && !Runtime::Current()->UseJitCompilation()) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (actually IP1) the read barrier mark introspection - // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is - // false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. // // We use link-time generated thunks for the slow path. That thunk checks // the holder and jumps to the entrypoint if needed. If the holder is not // gray, it creates a fake dependency and returns to the LDR instruction. // - // temp = Thread::Current()->pReadBarrierMarkIntrospection // lr = &gray_return_address; - // if (temp != nullptr) { - // goto field_thunk<holder_reg, base_reg>(lr) + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) // } // not_gray_return_address: // // Original reference load. If the offset is too large to fit @@ -6176,18 +6157,13 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip0.GetCode(), 16u); - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); - __ Ldr(ip1, MemOperand(tr, entry_point_offset)); __ Add(temp.X(), obj.X(), Operand(data_offset)); EmissionCheckScope guard(GetVIXLAssembler(), (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); __ Bind(cbnz_label); - __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), "Array LDR must be 1 instruction (4B) before the return address label; " " 2 instructions (8B) for heap poisoning."); @@ -6231,35 +6207,28 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // `instruction->IsArrayGet()` => `!use_load_acquire`. DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - // Query `art::Thread::Current()->GetIsGcMarking()` to decide - // whether we need to enter the slow path to mark the reference. - // Then, in the slow path, check the gray bit in the lock word of - // the reference's holder (`obj`) to decide whether to mark `ref` or - // not. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. // - // Note that we do not actually check the value of `GetIsGcMarking()`; - // instead, we load into `temp2` the read barrier mark entry point - // corresponding to register `ref`. If `temp2` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. - // - // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // if (mr) { // Thread::Current()->GetIsGcMarking() // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<mirror::Object> ref = *src; // Original reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { - // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. // } // } else { // HeapReference<mirror::Object> ref = *src; // Original reference load. // } // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp2`. - Register temp2 = lr; - Location temp2_loc = LocationFrom(temp2); + // entrypoint will be loaded by the slow path code. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( instruction, @@ -6270,19 +6239,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* scale_factor, needs_null_check, use_load_acquire, - temp, - /* entrypoint */ temp2_loc); + temp); AddSlowPath(slow_path); - // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ Ldr(temp2, MemOperand(tr, entry_point_offset)); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Cbnz(temp2, slow_path->GetEntryLabel()); + __ Cbnz(mr, slow_path->GetEntryLabel()); // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad( instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); @@ -6303,19 +6263,14 @@ void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* // `instruction->IsArrayGet()` => `!use_load_acquire`. DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - // Query `art::Thread::Current()->GetIsGcMarking()` to decide - // whether we need to enter the slow path to update the reference - // field within `obj`. Then, in the slow path, check the gray bit - // in the lock word of the reference's holder (`obj`) to decide - // whether to mark `ref` and update the field or not. - // - // Note that we do not actually check the value of `GetIsGcMarking()`; - // instead, we load into `temp2` the read barrier mark entry point - // corresponding to register `ref`. If `temp2` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to update the reference field within `obj`. Then, in the + // slow path, check the gray bit in the lock word of the reference's + // holder (`obj`) to decide whether to mark `ref` and update the + // field or not. // - // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // if (mr) { // Thread::Current()->GetIsGcMarking() // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -6323,15 +6278,14 @@ void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // old_ref = ref; - // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. // compareAndSwapObject(obj, field_offset, old_ref, ref); // } // } // Slow path updating the object reference at address `obj + field_offset` - // when the GC is marking. The entrypoint will already be loaded in `temp2`. - Register temp2 = lr; - Location temp2_loc = LocationFrom(temp2); + // when the GC is marking. The entrypoint will be loaded by the slow path code. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( instruction, @@ -6342,19 +6296,10 @@ void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* /* scale_factor */ 0u /* "times 1" */, needs_null_check, use_load_acquire, - temp, - /* entrypoint */ temp2_loc); + temp); AddSlowPath(slow_path); - // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ Ldr(temp2, MemOperand(tr, entry_point_offset)); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Cbnz(temp2, slow_path->GetEntryLabel()); + __ Cbnz(mr, slow_path->GetEntryLabel()); // Fast path: the GC is not marking: nothing to do (the field is // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index d9c49d19bb..584eead81b 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -70,21 +70,32 @@ static const vixl::aarch64::FPRegister kParameterFPRegisters[] = { }; static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters); -// Thread Register +// Thread Register. const vixl::aarch64::Register tr = vixl::aarch64::x19; +// Marking Register. +const vixl::aarch64::Register mr = vixl::aarch64::x20; // Method register on invoke. static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0; const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0, vixl::aarch64::ip1); const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31); -const vixl::aarch64::CPURegList runtime_reserved_core_registers(tr, vixl::aarch64::lr); - -// Callee-saved registers AAPCS64 (without x19 - Thread Register) -const vixl::aarch64::CPURegList callee_saved_core_registers(vixl::aarch64::CPURegister::kRegister, - vixl::aarch64::kXRegSize, - vixl::aarch64::x20.GetCode(), - vixl::aarch64::x30.GetCode()); +const vixl::aarch64::CPURegList runtime_reserved_core_registers = + vixl::aarch64::CPURegList( + tr, + // Reserve X20 as Marking Register when emitting Baker read barriers. + ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg), + vixl::aarch64::lr); + +// Callee-save registers AAPCS64, without x19 (Thread Register) (nor +// x20 (Marking Register) when emitting Baker read barriers). +const vixl::aarch64::CPURegList callee_saved_core_registers( + vixl::aarch64::CPURegister::kRegister, + vixl::aarch64::kXRegSize, + ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) + ? vixl::aarch64::x21.GetCode() + : vixl::aarch64::x20.GetCode()), + vixl::aarch64::x30.GetCode()); const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister, vixl::aarch64::kDRegSize, vixl::aarch64::d8.GetCode(), diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 9a2402be04..7334678f99 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -786,7 +786,7 @@ class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { } else { // Entrypoint is not already loaded, load from the thread. int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); } @@ -8559,7 +8559,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. DCHECK_EQ(ip.GetCode(), 12u); const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); vixl::EmissionCheckScope guard(GetVIXLAssembler(), @@ -8601,7 +8601,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); @@ -8705,7 +8705,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. DCHECK_EQ(ip.GetCode(), 12u); const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); vixl::EmissionCheckScope guard( @@ -8797,7 +8797,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. DCHECK_EQ(ip.GetCode(), 12u); const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); __ Add(data_reg, obj, Operand(data_offset)); @@ -8883,7 +8883,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset); @@ -8951,7 +8951,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index abe1d70216..be8f9e9cf8 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -656,7 +656,7 @@ class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { __ NopIfNoReordering(); } else { int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); // This runtime call does not require a stack map. mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, @@ -750,7 +750,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); // This runtime call does not require a stack map. mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, @@ -6497,7 +6497,7 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 232241c5ad..cf6b3d5805 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -606,7 +606,7 @@ class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Nop(); } else { int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); // This runtime call does not require a stack map. mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, @@ -699,7 +699,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 { // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); // This runtime call does not require a stack map. mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, @@ -4421,7 +4421,7 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 79fccfeaef..af0e6462a2 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -509,8 +509,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { // // rX <- ReadBarrierMarkRegX(rX) // - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); + int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); @@ -595,8 +594,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { // // rX <- ReadBarrierMarkRegX(rX) // - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); + int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); @@ -7153,7 +7151,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`). const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg()); __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0)); // The entrypoint is null when the GC is not marking. __ j(kNotEqual, slow_path->GetEntryLabel()); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 57319ce735..86f6d51734 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -524,7 +524,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); + Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); @@ -615,7 +615,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); + Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); @@ -6540,7 +6540,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint. const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0)); // The entrypoint is null when the GC is not marking. __ j(kNotEqual, slow_path->GetEntryLabel()); diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index ae5f8d1760..37958660e1 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -154,8 +154,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp; // TODO: Load the entrypoint once before the loop, instead of // loading it at every iteration. - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp); + int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ MaybePoisonHeapReference(tmp); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 37d79814be..aec1ec7669 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -205,7 +205,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { // TODO: Load the entrypoint once before the loop, instead of // loading it at every iteration. int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); + Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); // This runtime call does not require a stack map. codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 3c9b613803..ced931b36b 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -226,7 +226,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { // TODO: Load the entrypoint once before the loop, instead of // loading it at every iteration. int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode()); + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode()); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); assembler->MaybePoisonHeapReference(tmp); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 6b4851d541..a18b0cc400 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -143,8 +143,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { // explanations.) DCHECK_NE(temp2, ESP); DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2; - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); + int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); // This runtime call does not require a stack map. x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ MaybePoisonHeapReference(temp2); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ef98b7be30..5abdb1d1bd 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -105,8 +105,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP); + int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP); // This runtime call does not require a stack map. x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ MaybePoisonHeapReference(CpuRegister(TMP)); diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 60af2b4201..abab431bb2 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -31,21 +31,21 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000010: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xFF, 0x03, 0x01, 0xD1, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9, - 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9, - 0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, + 0xFF, 0x03, 0x01, 0xD1, 0xF5, 0x17, 0x00, 0xF9, 0xF6, 0x7B, 0x03, 0xA9, + 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF5, 0x17, 0x40, 0xF9, + 0xF6, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { - 0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44, + 0x44, 0x0E, 0x40, 0x44, 0x95, 0x06, 0x44, 0x96, 0x04, 0x9E, 0x02, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, - 0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, + 0x44, 0xD5, 0x44, 0xD6, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; // 0x00000000: sub sp, sp, #0x40 (64) // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: str x20, [sp, #40] -// 0x00000008: .cfi_offset: r20 at cfa-24 -// 0x00000008: stp x21, lr, [sp, #48] -// 0x0000000c: .cfi_offset: r21 at cfa-16 +// 0x00000004: str x21, [sp, #40] +// 0x00000008: .cfi_offset: r21 at cfa-24 +// 0x00000008: stp x22, lr, [sp, #48] +// 0x0000000c: .cfi_offset: r22 at cfa-16 // 0x0000000c: .cfi_offset: r30 at cfa-8 // 0x0000000c: stp d8, d9, [sp, #24] // 0x00000010: .cfi_offset_extended: r72 at cfa-40 @@ -54,10 +54,10 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000010: ldp d8, d9, [sp, #24] // 0x00000014: .cfi_restore_extended: r72 // 0x00000014: .cfi_restore_extended: r73 -// 0x00000014: ldr x20, [sp, #40] -// 0x00000018: .cfi_restore: r20 -// 0x00000018: ldp x21, lr, [sp, #48] -// 0x0000001c: .cfi_restore: r21 +// 0x00000014: ldr x21, [sp, #40] +// 0x00000018: .cfi_restore: r21 +// 0x00000018: ldp x22, lr, [sp, #48] +// 0x0000001c: .cfi_restore: r22 // 0x0000001c: .cfi_restore: r30 // 0x0000001c: add sp, sp, #0x40 (64) // 0x00000020: .cfi_def_cfa_offset: 0 diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index 9cd6884cbe..c436fd902c 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -772,6 +772,13 @@ void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size, asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size); asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Refresh Mark Register. + // TODO: Refresh MR only if suspend is taken. + ___ Ldr(reg_w(MR), + MemOperand(reg_x(TR), Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value())); + } + // Decrease frame size to start of callee saved regs. DecreaseFrameSize(frame_size); |