diff options
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 84 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 9 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 2 | ||||
| -rw-r--r-- | dex2oat/linker/arm/relative_patcher_thumb2_test.cc | 57 | ||||
| -rw-r--r-- | runtime/arch/arm/asm_support_arm.h | 4 | ||||
| -rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 195 | ||||
| -rw-r--r-- | runtime/oat.h | 4 |
7 files changed, 161 insertions, 194 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 7350b146f9..58ce9aa9f0 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -107,16 +107,6 @@ constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " -static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, - HInstruction* instruction) { - DCHECK(temps->IsAvailable(ip)); - temps->Exclude(ip); - DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); - DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); - DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( - instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); -} - static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); __ bind(patch_label); @@ -5973,8 +5963,6 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); } else { locations->AddTemp(Location::RequiresRegister()); } @@ -6087,11 +6075,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, maybe_temp, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -6390,8 +6378,6 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. if (kBakerReadBarrierLinkTimeThunksEnableForFields && !Runtime::Current()->UseJitCompilation() && instruction->GetIndex()->IsConstant()) { @@ -6404,16 +6390,10 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { if (offset >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation() && - !instruction->GetIndex()->IsConstant()) { - // We need a non-scratch temporary for the array data pointer. - locations->AddTemp(Location::RequiresRegister()); - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); } else { + // If using introspection, we need a non-scratch temporary for the array data pointer. + // Otherwise, we need a temporary register for the read barrier marking slow + // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { @@ -6526,20 +6506,22 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. + Location maybe_temp = + (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, out_loc, obj, data_offset, - locations->GetTemp(0), + maybe_temp, /* needs_null_check */ false); } else { + Location temp = locations->GetTemp(0); codegen_->GenerateArrayLoadWithBakerReadBarrier( instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); } @@ -7447,13 +7429,6 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } - if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - if (load_kind == HLoadClass::LoadKind::kBssEntry || - (load_kind == HLoadClass::LoadKind::kReferrersClass && - !Runtime::Current()->UseJitCompilation())) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } - } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -7687,9 +7662,6 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); - if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7866,9 +7838,6 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); - } } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -8829,7 +8798,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // return_address: UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + temps.Exclude(ip); bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); @@ -8897,16 +8866,6 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( MaybeGenerateMarkingRegisterCheck(/* code */ 18); } -void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields) { - if (!Runtime::Current()->UseJitCompilation()) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } - } -} - void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, @@ -8944,7 +8903,6 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i vixl32::Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { base = RegisterFrom(temp); - DCHECK(!base.Is(kBakerCcEntrypointRegister)); static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); offset &= (kReferenceLoadMinFarOffset - 1u); @@ -8954,7 +8912,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(!narrow); } UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + temps.Exclude(ip); uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); @@ -9037,10 +8995,9 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. - DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + temps.Exclude(ip); uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); @@ -9927,16 +9884,16 @@ static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler, } // Load the read barrier introspection entrypoint in register `entrypoint` -static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler, - vixl32::Register entrypoint) { +static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) { // The register where the read barrier introspection entrypoint is loaded - // is fixed: `kBakerCcEntrypointRegister` (R4). - DCHECK(entrypoint.Is(kBakerCcEntrypointRegister)); + // is the marking register. We clobber it here and the entrypoint restores it to 1. + vixl32::Register entrypoint = mr; // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. DCHECK_EQ(ip.GetCode(), 12u); const int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); + return entrypoint; } void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler, @@ -9975,8 +9932,7 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb __ Bind(&slow_path); const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + raw_ldr_offset; - vixl32::Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); if (width == BakerReadBarrierWidth::kWide) { MemOperand ldr_half_address(lr, ldr_offset + 2); __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". @@ -10016,8 +9972,7 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb MemOperand ldr_address(lr, ldr_offset + 2); __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", // i.e. Rm+32 because the scale in imm2 is 2. - vixl32::Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create // a switch case target based on the index register. __ Mov(ip, base_reg); // Move the base register to ip0. @@ -10050,8 +10005,7 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb " the highest bits and the 'forwarding address' state to have all bits set"); __ Cmp(ip, Operand(0xc0000000)); __ B(hs, &forwarding_address); - vixl32::Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister // to art_quick_read_barrier_mark_introspection_gc_roots. int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 6b9919ab15..d5b739bd7c 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -113,9 +113,6 @@ static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = { static const size_t kRuntimeParameterFpuRegistersLengthVIXL = arraysize(kRuntimeParameterFpuRegistersVIXL); -// The reserved entrypoint register for link-time generated thunks. -const vixl::aarch32::Register kBakerCcEntrypointRegister = vixl32::r4; - class LoadClassSlowPathARMVIXL; class CodeGeneratorARMVIXL; @@ -611,10 +608,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; - // Maybe add the reserved entrypoint register as a temporary for field load. This temp - // is added only for AOT compilation if link-time generated thunks for fields are enabled. - void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); - // Generate a GC root reference load: // // root <- *(obj + offset) @@ -816,7 +809,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { kBitsForBakerReadBarrierWidth>; static void CheckValidReg(uint32_t reg) { - DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != kBakerCcEntrypointRegister.GetCode()) << reg; + DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != mr.GetCode()) << reg; } static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 29aecbc097..5287b4b2fa 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -1802,8 +1802,6 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); - arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } diff --git a/dex2oat/linker/arm/relative_patcher_thumb2_test.cc b/dex2oat/linker/arm/relative_patcher_thumb2_test.cc index e7b11bd16b..3fe97e146c 100644 --- a/dex2oat/linker/arm/relative_patcher_thumb2_test.cc +++ b/dex2oat/linker/arm/relative_patcher_thumb2_test.cc @@ -625,18 +625,23 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) { ASSERT_LT(GetMethodOffset(1u), 0xfcu); } +const uint32_t kBakerValidRegs[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 9, 10, 11, // r8 (rMR), IP, SP, LR and PC are reserved. +}; + +const uint32_t kBakerValidRegsNarrow[] = { + 0, 1, 2, 3, 4, 5, 6, 7, +}; + void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; DCHECK_ALIGNED(offset, 4u); DCHECK_LT(offset, 4 * KB); constexpr size_t kMethodCodeSize = 8u; constexpr size_t kLiteralOffset = 0u; uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { + for (uint32_t base_reg : kBakerValidRegs) { + for (uint32_t holder_reg : kBakerValidRegs) { uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); @@ -655,8 +660,8 @@ void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref // All thunks are at the end. uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { + for (uint32_t base_reg : kBakerValidRegs) { + for (uint32_t holder_reg : kBakerValidRegs) { ++method_idx; uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); @@ -725,20 +730,16 @@ void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref } void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; DCHECK_ALIGNED(offset, 4u); DCHECK_LT(offset, 32u); constexpr size_t kMethodCodeSize = 6u; constexpr size_t kLiteralOffset = 0u; uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { + for (uint32_t base_reg : kBakerValidRegs) { if (base_reg >= 8u) { continue; } - for (uint32_t holder_reg : valid_regs) { + for (uint32_t holder_reg : kBakerValidRegs) { uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); @@ -757,11 +758,11 @@ void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t r // All thunks are at the end. uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); method_idx = 0u; - for (uint32_t base_reg : valid_regs) { + for (uint32_t base_reg : kBakerValidRegs) { if (base_reg >= 8u) { continue; } - for (uint32_t holder_reg : valid_regs) { + for (uint32_t holder_reg : kBakerValidRegs) { ++method_idx; uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; @@ -1021,10 +1022,6 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast } TEST_F(Thumb2RelativePatcherTest, BakerArray) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; auto ldr = [](uint32_t base_reg) { uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; @@ -1033,7 +1030,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) { constexpr size_t kMethodCodeSize = 8u; constexpr size_t kLiteralOffset = 0u; uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { + for (uint32_t base_reg : kBakerValidRegs) { ++method_idx; const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); @@ -1049,7 +1046,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) { // All thunks are at the end. uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); method_idx = 0u; - for (uint32_t base_reg : valid_regs) { + for (uint32_t base_reg : kBakerValidRegs) { ++method_idx; uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)}); @@ -1106,14 +1103,10 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) { } TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; constexpr size_t kMethodCodeSize = 8u; constexpr size_t kLiteralOffset = 4u; uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { + for (uint32_t root_reg : kBakerValidRegs) { ++method_idx; uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); @@ -1130,7 +1123,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { // All thunks are at the end. uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); method_idx = 0u; - for (uint32_t root_reg : valid_regs) { + for (uint32_t root_reg : kBakerValidRegs) { ++method_idx; uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); @@ -1165,14 +1158,10 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { } TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - // Not appplicable to high registers. - }; constexpr size_t kMethodCodeSize = 6u; constexpr size_t kLiteralOffset = 2u; uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { + for (uint32_t root_reg : kBakerValidRegsNarrow) { ++method_idx; uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); @@ -1189,7 +1178,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { // All thunks are at the end. uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); method_idx = 0u; - for (uint32_t root_reg : valid_regs) { + for (uint32_t root_reg : kBakerValidRegsNarrow) { ++method_idx; uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index ac17303cf9..7123ae73b4 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -32,8 +32,8 @@ #define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET 0x20 // The offsets from art_quick_read_barrier_mark_introspection to the GC root entrypoints, // i.e. art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. -#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0x80 -#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xc0 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0xc0 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xe0 // The offset from art_quick_read_barrier_mark_introspection to the array switch cases, // i.e. art_quick_read_barrier_mark_introspection_arrays. #define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100 diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 0fd239a244..526960b79d 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2362,23 +2362,19 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 // Helper macros for Baker CC read barrier mark introspection (BRBMI). -.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register +.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register \macro_for_register r0 \macro_for_register r1 \macro_for_register r2 \macro_for_register r3 - \macro_for_reserved_register // R4 is reserved for the entrypoint address. + \macro_for_register r4 \macro_for_register r5 \macro_for_register r6 \macro_for_register r7 - \macro_for_register r8 + \macro_for_reserved_register // r8 (rMR) is the marking register. \macro_for_register r9 \macro_for_register r10 \macro_for_register r11 -.endm - -.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register - BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register \macro_for_reserved_register // IP is reserved. \macro_for_reserved_register // SP is reserved. \macro_for_reserved_register // LR is reserved. @@ -2386,16 +2382,13 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 .endm .macro BRBMI_RETURN_SWITCH_CASE reg + .balign 8 .Lmark_introspection_return_switch_case_\reg: + mov rMR, #1 mov \reg, ip bx lr .endm -.macro BRBMI_BAD_RETURN_SWITCH_CASE -.Lmark_introspection_return_switch_case_bad: - BRBMI_BKPT_FILL_4B -.endm - .macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 .endm @@ -2458,9 +2451,9 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 // If reference is null, just return it in the right register. cmp ip, #0 beq .Lmark_introspection_return\label_suffix - // Use R4 as temp and check the mark bit of the reference. - ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + // Use rMR as temp and check the mark bit of the reference. + ldr rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED beq .Lmark_introspection_unmarked\label_suffix .Lmark_introspection_return\label_suffix: .endm @@ -2473,7 +2466,7 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 // the highest bits and the "forwarding address" state to have all bits set. #error "Unexpected lock word state shift or forwarding address state value." #endif - cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) + cmp rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) bhs .Lmark_introspection_forwarding_address\label_suffix .endm @@ -2483,41 +2476,50 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 // Shift left by the forwarding address shift. This clears out the state bits since they are // in the top 2 bits of the lock word. - lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + lsl ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT b .Lmark_introspection_return\label_suffix .endm .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. - ldrh r4, [lr, #(-1 + \ldr_offset + 2)] + ldrh rMR, [lr, #(-1 + \ldr_offset + 2)] .endm .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset // Load the 16-bit instruction. Adjust for the thumb state in LR. - ldrh r4, [lr, #(-1 + \ldr_offset)] + ldrh rMR, [lr, #(-1 + \ldr_offset)] .endm -.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix - .balign 64 +.macro BRBMI_EXTRACT_RETURN_REG_wide + lsr rMR, rMR, #12 // Extract `ref_reg`. +.endm + +.macro BRBMI_EXTRACT_RETURN_REG_narrow + and rMR, rMR, #7 // Extract `ref_reg`. +.endm + +.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix + BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset + BRBMI_EXTRACT_RETURN_REG\label_suffix +.endm + +.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix + .balign 32 .thumb_func .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: - BRBMI_RUNTIME_CALL - // Load the LDR (or the half of it) that contains Rt. - BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset - b .Lmark_introspection_extract_register_and_return\label_suffix - // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for - // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze - // the 6 byte forwarding address extraction here across the 32-byte boundary. - BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix - // And the slow path taking exactly 30 bytes (6 bytes for the forwarding - // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near - // branch) shall take the rest of the 32-byte section (within a cache line). + BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix +.endm + +.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix + .balign 16 +.Lmark_introspection_unmarked\label_suffix: + // Note: Generates exactly 16 bytes of code. BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix - BRBMI_RUNTIME_CALL - b .Lmark_introspection_return\label_suffix + BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix + b .Lmark_introspection_runtime_call .endm /* @@ -2540,9 +2542,12 @@ art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: * not do the gray bit check. * * For field accesses and array loads with a constant index the thunk loads - * the reference into IP using introspection and calls the main entrypoint, - * art_quick_read_barrier_mark_introspection. With heap poisoning enabled, - * the passed reference is poisoned. + * the reference into IP using introspection and calls the main entrypoint + * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or + * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known + * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET) + * from the main entrypoint and the thunk adjusts the entrypoint pointer. + * With heap poisoning enabled, the passed reference is poisoned. * * For array accesses with non-constant index, the thunk inserts the bits * 0-5 of the LDR instruction to the entrypoint address, effectively @@ -2560,53 +2565,61 @@ art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: * (And even with heap poisoning enabled, GC roots are not poisoned.) * To re-use the same entrypoint pointer in generated code, we make sure * that the gc root entrypoint (a copy of the entrypoint with a different - * offset for introspection loads) is located at a known offset (128 bytes, - * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main - * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves - * the root register to IP and jumps to the customized entrypoint, - * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also - * performs all the fast-path checks, so we need just the slow path. + * offset for introspection loads) is located at a known offset (0xc0/0xe0 + * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/ + * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the + * main entrypoint and the GC root thunk adjusts the entrypoint pointer, + * moves the root register to IP and jumps to the customized entrypoint, + * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. + * The thunk also performs all the fast-path checks, so we need just the + * slow path. * * The code structure is - * art_quick_read_barrier_mark_introspection: + * art_quick_read_barrier_mark_introspection: // @0x00 * Up to 32 bytes code for main entrypoint fast-path code for fields * (and array elements with constant offset) with LDR encoding T3; * jumps to the switch in the "narrow" entrypoint. - * Padding to 32 bytes if needed. - * art_quick_read_barrier_mark_introspection_narrow: + * art_quick_read_barrier_mark_introspection_narrow: // @0x20 * Up to 48 bytes code for fast path code for fields (and array * elements with constant offset) with LDR encoding T1, ending in the * return switch instruction TBB and the table with switch offsets. - * Padding to 80 bytes if needed. - * .Lmark_introspection_return_switch_case_r0: - * Exactly 48 bytes of code for the return switch cases (12 cases, - * including BKPT for the reserved registers). - * Ends at 128 bytes total. - * art_quick_read_barrier_mark_introspection_gc_roots_wide: - * GC root entrypoint code for LDR encoding T3 (28 bytes). - * Forwarding address extraction for LDR encoding T3 (6 bytes). - * Slow path for main entrypoint for LDR encoding T3 (30 bytes). - * Ends at 192 bytes total. - * art_quick_read_barrier_mark_introspection_gc_roots_narrow: - * GC root entrypoint code for LDR encoding T1 (28 bytes). - * Forwarding address extraction for LDR encoding T1 (6 bytes). - * Slow path for main entrypoint for LDR encoding T1 (30 bytes). - * Ends at 256 bytes total. - * art_quick_read_barrier_mark_introspection_arrays: + * .Lmark_introspection_return_switch_case_r0: // @0x50 + * Exactly 88 bytes of code for the return switch cases (8 bytes per + * case, 11 cases; no code for reserved registers). + * .Lmark_introspection_forwarding_address_narrow: // @0xa8 + * Exactly 6 bytes to extract the forwarding address and jump to the + * "narrow" entrypoint fast path. + * .Lmark_introspection_return_switch_case_bad: // @0xae + * Exactly 2 bytes, bkpt for unexpected return register. + * .Lmark_introspection_unmarked_narrow: // @0xb0 + * Exactly 16 bytes for "narrow" entrypoint slow path. + * art_quick_read_barrier_mark_introspection_gc_roots_wide: // @0xc0 + * GC root entrypoint code for LDR encoding T3 (10 bytes); loads and + * extracts the return register and jumps to the runtime call. + * .Lmark_introspection_forwarding_address_wide: // @0xca + * Exactly 6 bytes to extract the forwarding address and jump to the + * "wide" entrypoint fast path. + * .Lmark_introspection_unmarked_wide: // @0xd0 + * Exactly 16 bytes for "wide" entrypoint slow path. + * art_quick_read_barrier_mark_introspection_gc_roots_narrow: // @0xe0 + * GC root entrypoint code for LDR encoding T1 (8 bytes); loads and + * extracts the return register and falls through to the runtime call. + * .Lmark_introspection_runtime_call: // @0xe8 + * Exactly 24 bytes for the runtime call to MarkReg() and jump to the + * return switch. + * art_quick_read_barrier_mark_introspection_arrays: // @0x100 * Exactly 128 bytes for array load switch cases (16x2 instructions). */ .balign 512 ENTRY art_quick_read_barrier_mark_introspection - // At this point, IP contains the reference, R4 can be freely used. - // (R4 is reserved for the entrypoint address.) + // At this point, IP contains the reference, rMR is clobbered by the thunk + // and can be freely used as it will be set back to 1 before returning. // For heap poisoning, the reference is poisoned, so unpoison it first. UNPOISON_HEAP_REF ip - // Check for null or marked, lock word is loaded into IP. + // Check for null or marked, lock word is loaded into rMR. BRBMI_CHECK_NULL_AND_MARKED _wide - // Load the half of the instruction that contains Rt. - BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -.Lmark_introspection_extract_register_and_return_wide: - lsr r4, r4, #12 // Extract `ref_reg`. + // Load and extract the return register from the instruction. + BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide b .Lmark_introspection_return_switch .balign 32 @@ -2615,25 +2628,45 @@ ENTRY art_quick_read_barrier_mark_introspection .hidden art_quick_read_barrier_mark_introspection_narrow .global art_quick_read_barrier_mark_introspection_narrow art_quick_read_barrier_mark_introspection_narrow: - // At this point, IP contains the reference, R4 can be freely used. - // (R4 is reserved for the entrypoint address.) + // At this point, IP contains the reference, rMR is clobbered by the thunk + // and can be freely used as it will be set back to 1 before returning. // For heap poisoning, the reference is poisoned, so unpoison it first. UNPOISON_HEAP_REF ip - // Check for null or marked, lock word is loaded into R4. + // Check for null or marked, lock word is loaded into rMR. BRBMI_CHECK_NULL_AND_MARKED _narrow - // Load the 16-bit instruction. - BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -.Lmark_introspection_extract_register_and_return_narrow: - and r4, r4, #7 // Extract `ref_reg`. + // Load and extract the return register from the instruction. + BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow .Lmark_introspection_return_switch: - tbb [pc, r4] // Jump to the switch case. + tbb [pc, rMR] // Jump to the switch case. .Lmark_introspection_return_table: BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET - .balign 16 - BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE + BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */ + + .balign 8 + BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow // 6 bytes +.Lmark_introspection_return_switch_case_bad: + bkpt // 2 bytes + + BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow + + // 8 bytes for the loading and extracting of the return register. + BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide + // 2 bytes for near branch to the runtime call. + b .Lmark_introspection_runtime_call + + BRBMI_EXTRACT_FORWARDING_ADDRESS _wide // Not even 4-byte aligned. + + BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide + + // 8 bytes for the loading and extracting of the return register. + BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow + // And the runtime call and branch to the switch taking exactly 24 bytes + // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch) + // shall take the rest of the 32-byte section (within a cache line). +.Lmark_introspection_runtime_call: + BRBMI_RUNTIME_CALL + b .Lmark_introspection_return_switch - BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide - BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow .balign 256 .thumb_func diff --git a/runtime/oat.h b/runtime/oat.h index 01d391401d..0318606f87 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - // Last oat version changed reason: Retrieve Class* and String* from .data.bimg.rel.ro . - static constexpr uint8_t kOatVersion[] = { '1', '4', '0', '\0' }; + // Last oat version changed reason: Use rMR as temp in Baker RB introspection marking. + static constexpr uint8_t kOatVersion[] = { '1', '4', '1', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; |