diff options
-rw-r--r-- | compiler/linker/arm64/relative_patcher_arm64.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 75 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 55 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 15 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 21 | ||||
-rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 187 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 92 | ||||
-rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 130 | ||||
-rw-r--r-- | runtime/arch/x86_64/asm_support_x86_64.S | 4 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 125 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_dexcache_entrypoints.cc | 6 |
11 files changed, 505 insertions, 207 deletions
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 3b7788068e..4a9de7f3d1 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -214,7 +214,7 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType(); } else { - // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the + // With the read barrier (non-Baker) enabled, it could be kDexCacheArray in the // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString(). DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative || diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 00530d8140..88bfad49c9 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -429,33 +429,49 @@ class LoadStringSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); + Register out = locations->Out().AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - HLoadString* load = instruction_->AsLoadString(); - const uint32_t string_index = load->GetStringIndex(); + // In the unlucky case that the `temp` is R0, we preserve the address in `out` across + // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call). + bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0)); + Register entry_address = temp_is_r0 ? out : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (call_saves_everything_except_r0 && temp_is_r0) { + __ mov(entry_address, ShifterOperand(temp)); + } + __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index); arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); - RestoreLiveRegisters(codegen, locations); + // Store the resolved String to the .bss entry. + if (call_saves_everything_except_r0) { + // The string entry address was preserved in `entry_address` thanks to kSaveEverything. + __ str(R0, Address(entry_address)); + } else { + // For non-Baker read barrier, we need to re-calculate the address of the string entry. + CodeGeneratorARM::PcRelativePatchInfo* labels = + arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + __ BindTrackedLabel(&labels->movw_label); + __ movw(entry_address, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(entry_address, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(entry_address, entry_address, ShifterOperand(PC)); + __ str(R0, Address(entry_address)); + } - // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. - CodeGeneratorARM::PcRelativePatchInfo* labels = - arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - __ BindTrackedLabel(&labels->movw_label); - __ movw(IP, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->movt_label); - __ movt(IP, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->add_pc_label); - __ add(IP, IP, ShifterOperand(PC)); - __ str(locations->Out().AsRegister<Register>(), Address(IP)); + arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); + RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -5694,10 +5710,25 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RegisterLocation(R0)); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything, including temps. + // Note that IP may theoretically be clobbered by saving/restoring the live register + // (only one thanks to the custom calling convention), so we request a different temp. + locations->AddTemp(Location::RequiresRegister()); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -5733,15 +5764,16 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register temp = locations->GetTemp(0).AsRegister<Register>(); CodeGeneratorARM::PcRelativePatchInfo* labels = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); __ BindTrackedLabel(&labels->movw_label); - __ movw(out, /* placeholder */ 0u); + __ movw(temp, /* placeholder */ 0u); __ BindTrackedLabel(&labels->movt_label); - __ movt(out, /* placeholder */ 0u); + __ movt(temp, /* placeholder */ 0u); __ BindTrackedLabel(&labels->add_pc_label); - __ add(out, out, ShifterOperand(PC)); - GenerateGcRootFieldLoad(load, out_loc, out, 0); + __ add(temp, temp, ShifterOperand(PC)); + GenerateGcRootFieldLoad(load, out_loc, temp, 0); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -5755,6 +5787,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { // TODO: Consider re-adding the compiler code to do string dex cache lookup again. DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex()); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index f02b028541..9e59d8cc38 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -331,13 +331,20 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} + LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label) + : SlowPathCodeARM64(instruction), + temp_(temp), + adrp_label_(adrp_label) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // temp_ is a scratch register. Make sure it's not used for saving/restoring registers. + UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); + temps.Exclude(temp_); + __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -352,21 +359,21 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { RestoreLiveRegisters(codegen, locations); // Store the resolved String to the BSS entry. - UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); - Register temp = temps.AcquireX(); const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile(); - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary - // for the ADRP in the fast path, so that we can avoid the ADRP here. - vixl::aarch64::Label* adrp_label = - arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); - arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp); + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // The string entry page address was preserved in temp_ thanks to kSaveEverything. + } else { + // For non-Baker read barrier, we need to re-calculate the address of the string entry page. + adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); + arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_); + } vixl::aarch64::Label* strp_label = - arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_); { SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); __ Bind(strp_label); __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), - MemOperand(temp, /* offset placeholder */ 0)); + MemOperand(temp_, /* offset placeholder */ 0)); } __ B(GetExitLabel()); @@ -375,6 +382,9 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } private: + const Register temp_; + vixl::aarch64::Label* adrp_label_; + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); }; @@ -4246,11 +4256,24 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); InvokeRuntimeCallingConvention calling_convention; locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); } else { locations->SetOut(Location::RequiresRegister()); + if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything, including temps. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), + RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot), + Primitive::kPrimNot).GetCode()); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -4285,18 +4308,21 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { const DexFile& dex_file = load->GetDexFile(); uint32_t string_index = load->GetStringIndex(); DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireX(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); - codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + codegen_->EmitAdrpPlaceholder(adrp_label, temp); // Add LDR with its PC-relative String patch. vixl::aarch64::Label* ldr_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ GenerateGcRootFieldLoad(load, load->GetLocations()->Out(), - out.X(), + temp, /* placeholder */ 0u, ldr_label); - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label); codegen_->AddSlowPath(slow_path); __ Cbz(out.X(), slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -4308,6 +4334,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex()); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e74e60514d..02c1c3b69f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -6048,8 +6048,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || - load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6057,6 +6056,17 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { locations->SetOut(Location::RegisterLocation(EAX)); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -6103,6 +6113,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex())); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5cabc8fa06..4b64c1b6ff 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -299,9 +299,9 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); - __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index)); + // Custom calling convention: RAX serves as both input and output. + __ movl(CpuRegister(RAX), Immediate(string_index)); x86_64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), @@ -5456,10 +5456,20 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RegisterLocation(RAX)); } else { locations->SetOut(Location::RequiresRegister()); + if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything. + // Custom calling convention: RAX serves as both input and output. + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(RAX)); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -5499,9 +5509,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { } // TODO: Re-add the compiler code to do string dex cache lookup again. - InvokeRuntimeCallingConvention calling_convention; - __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), - Immediate(load->GetStringIndex())); + // Custom calling convention: RAX serves as both input and output. + __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex())); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index cdb4c251a8..bf70c554b1 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -239,6 +239,30 @@ .cfi_adjust_cfa_offset -56 .endm +.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 + add sp, #8 @ rewind sp + .cfi_adjust_cfa_offset -8 + vpop {d0-d15} + .cfi_adjust_cfa_offset -128 + add sp, #4 @ skip r0 + .cfi_adjust_cfa_offset -4 + .cfi_restore r0 @ debugger can no longer restore caller's r0 + pop {r1-r12, lr} @ 13 words of callee saves + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore r9 + .cfi_restore r10 + .cfi_restore r11 + .cfi_restore r12 + .cfi_restore lr + .cfi_adjust_cfa_offset -52 +.endm + .macro RETURN_IF_RESULT_IS_ZERO cbnz r0, 1f @ result non-zero branch over bx lr @ return @@ -252,17 +276,23 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -.macro DELIVER_PENDING_EXCEPTION - .fnend - .fnstart - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY mov r0, r9 @ pass Thread::Current bl artDeliverPendingExceptionFromCode @ artDeliverPendingExceptionFromCode(Thread*) .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name .extern \cxx_name ENTRY \c_name @@ -1078,41 +1108,71 @@ END art_quick_set64_instance */ ENTRY art_quick_resolve_string - ldr r1, [sp] @ load referrer - ldr r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class - ldr r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache - ubfx r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS - add r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT - ldrd r2, r3, [r1] @ load index into r3 and pointer into r2 - cmp r0, r3 + push {r10-r12, lr} + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset r10, 0 + .cfi_rel_offset r11, 4 + .cfi_rel_offset ip, 8 + .cfi_rel_offset lr, 12 + ldr r10, [sp, #16] @ load referrer + ldr r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class + ldr r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache + ubfx r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS + add r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT + ldrd r10, r11, [r10] @ load index into r11 and pointer into r10 + cmp r0, r11 bne .Lart_quick_resolve_string_slow_path #ifdef USE_READ_BARRIER - ldr r3, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz r3, .Lart_quick_resolve_string_marking + ldr r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz r0, .Lart_quick_resolve_string_marking +.Lart_quick_resolve_string_no_rb: #endif - mov r0, r2 - bx lr -// Slow path case, the index did not match -.Lart_quick_resolve_string_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC - mov r1, r9 @ pass Thread::Current - mov r3, sp - bl artResolveStringFromCode @ (uint32_t type_idx, Method* method, Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + mov r0, r10 + pop {r10-r12, pc} + +#ifdef USE_READ_BARRIER // GC is marking case, need to check the mark bit. .Lart_quick_resolve_string_marking: - ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET] - tst r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED - mov r0, r2 - bne .Lart_quick_resolve_string_no_rb - push {r1, r2, r3, lr} @ Save x1, LR - .cfi_adjust_cfa_offset 16 - bl artReadBarrierMark @ Get the marked string back. - pop {r1, r2, r3, lr} @ Restore registers. + ldr r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET] + lsrs r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1) + bcs .Lart_quick_resolve_string_no_rb + mov r0, r10 + .cfi_remember_state + pop {r10-r12, lr} .cfi_adjust_cfa_offset -16 -.Lart_quick_resolve_string_no_rb: + .cfi_restore r10 + .cfi_restore r11 + .cfi_restore r12 + .cfi_restore lr + // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not. + b .Lslow_rb_art_quick_read_barrier_mark_reg00 @ Get the marked string back. + .cfi_restore_state +#endif + +// Slow path case, the index did not match +.Lart_quick_resolve_string_slow_path: + push {r0-r9} @ 10 words of callee saves and args; {r10-r12, lr} already saved. + .cfi_adjust_cfa_offset 40 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r4, 16 + .cfi_rel_offset r5, 20 + .cfi_rel_offset r6, 24 + .cfi_rel_offset r7, 28 + .cfi_rel_offset r8, 32 + .cfi_rel_offset r9, 36 + SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 @ save callee saves in case of GC + mov r1, r9 @ pass Thread::Current + bl artResolveStringFromCode @ (uint32_t type_idx, Thread*) + cbz r0, 1f @ If result is null, deliver the OOME. + .cfi_remember_state + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 bx lr + .cfi_restore_state +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. @@ -1920,6 +1980,8 @@ END art_quick_l2f * getting its argument and returning its result through register * `reg`, saving and restoring all caller-save registers. * + * IP is clobbered; `reg` must not be IP. + * * If `reg` is different from `r0`, the generated function follows a * non-standard runtime calling convention: * - register `reg` is used to pass the (sole) argument of this @@ -1936,36 +1998,71 @@ ENTRY \name SMART_CBZ \reg, .Lret_rb_\name // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked. ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] - ands ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED beq .Lslow_rb_\name // Already marked, return right away. +.Lret_rb_\name: bx lr .Lslow_rb_\name: - push {r0-r5, r9, lr} @ save return address and core caller-save registers - @ also save callee save r5 for 16 byte alignment + // Save IP: the kSaveEverything entrypoint art_quick_resolve_string makes a tail call here. + push {r0-r4, r9, ip, lr} @ save return address, core caller-save registers and ip .cfi_adjust_cfa_offset 32 .cfi_rel_offset r0, 0 .cfi_rel_offset r1, 4 .cfi_rel_offset r2, 8 .cfi_rel_offset r3, 12 .cfi_rel_offset r4, 16 - .cfi_rel_offset r5, 20 - .cfi_rel_offset r9, 24 + .cfi_rel_offset r9, 20 + .cfi_rel_offset ip, 24 .cfi_rel_offset lr, 28 - vpush {s0-s15} @ save floating-point caller-save registers - .cfi_adjust_cfa_offset 64 .ifnc \reg, r0 mov r0, \reg @ pass arg1 - obj from `reg` .endif + + vpush {s0-s15} @ save floating-point caller-save registers + .cfi_adjust_cfa_offset 64 bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) - mov ip, r0 @ Save result in IP vpop {s0-s15} @ restore floating-point registers .cfi_adjust_cfa_offset -64 - pop {r0-r5, r9, lr} @ restore caller-save registers - mov \reg, ip @ copy result to reg -.Lret_rb_\name: + + .ifc \reg, r0 @ Save result to the stack slot or destination register. + str r0, [sp, #0] + .else + .ifc \reg, r1 + str r0, [sp, #4] + .else + .ifc \reg, r2 + str r0, [sp, #8] + .else + .ifc \reg, r3 + str r0, [sp, #12] + .else + .ifc \reg, r4 + str r0, [sp, #16] + .else + .ifc \reg, r9 + str r0, [sp, #20] + .else + mov \reg, r0 + .endif + .endif + .endif + .endif + .endif + .endif + + pop {r0-r4, r9, ip, lr} @ restore caller-save registers + .cfi_adjust_cfa_offset -32 + .cfi_restore r0 + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r4 + .cfi_restore r9 + .cfi_restore ip + .cfi_restore lr bx lr END \name .endm diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 04a3cc6cae..483cee3100 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -337,7 +337,7 @@ SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR .endm -.macro RESTORE_SAVE_EVERYTHING_FRAME +.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 // Restore FP registers. // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned. ldr d0, [sp, #8] @@ -359,7 +359,6 @@ ldr d31, [sp, #256] // Restore core registers. - RESTORE_REG x0, 264 RESTORE_TWO_REGS x1, x2, 272 RESTORE_TWO_REGS x3, x4, 288 RESTORE_TWO_REGS x5, x6, 304 @@ -379,6 +378,11 @@ DECREASE_FRAME 512 .endm +.macro RESTORE_SAVE_EVERYTHING_FRAME + RESTORE_REG x0, 264 + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 +.endm + .macro RETURN_IF_RESULT_IS_ZERO cbnz x0, 1f // result non-zero branch over ret // return @@ -392,11 +396,10 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -.macro DELIVER_PENDING_EXCEPTION - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY mov x0, xSELF // Point of no return. @@ -404,6 +407,15 @@ brk 0 // Unreached .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET] // Get exception field. cbnz \reg, 1f @@ -1638,40 +1650,54 @@ END art_quick_set64_static */ ENTRY art_quick_resolve_string - ldr x1, [sp] // load referrer - ldr w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class - ldr x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache - ubfx x2, x0, #0, #STRING_DEX_CACHE_HASH_BITS // get masked string index into x2 - ldr x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x2 - cmp x0, x2, lsr #32 // compare against upper 32 bits + SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__ + ldr x29, [sp, #(2 * __SIZEOF_POINTER__)] // load referrer + ldr w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class + ldr x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache + ubfx lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS // get masked string index into LR + ldr x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x29 + cmp x0, x29, lsr #32 // compare against upper 32 bits bne .Lart_quick_resolve_string_slow_path - ubfx x0, x2, #0, #32 // extract lower 32 bits into x0 + ubfx x0, x29, #0, #32 // extract lower 32 bits into x0 #ifdef USE_READ_BARRIER // Most common case: GC is not marking. - ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz x3, .Lart_quick_resolve_string_marking + ldr w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz x29, .Lart_quick_resolve_string_marking +.Lart_quick_resolve_string_no_rb: #endif + .cfi_remember_state + RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__ ret + .cfi_restore_state + .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598 + +#ifdef USE_READ_BARRIER +// GC is marking case, need to check the mark bit. +.Lart_quick_resolve_string_marking: + ldr x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tbnz x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb + .cfi_remember_state + RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__ + // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not. + b .Lslow_rb_art_quick_read_barrier_mark_reg00 // Get the marked string back. + .cfi_restore_state + .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598 +#endif // Slow path case, the index did not match. .Lart_quick_resolve_string_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC + INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__) + SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR // save callee saves in case of GC mov x1, xSELF // pass Thread::Current bl artResolveStringFromCode // (int32_t string_idx, Thread* self) - RESTORE_SAVE_REFS_ONLY_FRAME - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER - -// GC is marking case, need to check the mark bit. -.Lart_quick_resolve_string_marking: - ldr x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - tbnz x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb - // Save LR so that we can return, also x1 for alignment purposes. - SAVE_TWO_REGS_INCREASE_FRAME x1, xLR, 16 // Save x1, LR. - bl artReadBarrierMark // Get the marked string back. - RESTORE_TWO_REGS_DECREASE_FRAME x1, xLR, 16 // Restore registers. -.Lart_quick_resolve_string_no_rb: - ret - + cbz w0, 1f // If result is null, deliver the OOME. + .cfi_remember_state + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 + ret // return + .cfi_restore_state + .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING // workaround for clang bug: 31975598 +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. @@ -2513,9 +2539,10 @@ ENTRY \name */ // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler. ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name + tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_rb_\name +.Lret_rb_\name: ret -.Lslow_path_rb_\name: +.Lslow_rb_\name: // Save all potentially live caller-save core registers. SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 368 SAVE_TWO_REGS x2, x3, 16 @@ -2580,7 +2607,6 @@ ENTRY \name // Restore return address and remove padding. RESTORE_REG xLR, 360 DECREASE_FRAME 368 -.Lret_rb_\name: ret END \name .endm diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 7bb59efdbf..f4f9a68e30 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -224,12 +224,11 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) - * when EDI is already saved. + * when EDI and ESI are already saved. */ -MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg) +MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED, got_reg, temp_reg) // Save core registers from highest to lowest to agree with core spills bitmap. - // EDI, or at least a placeholder for it, is already on the stack. - PUSH esi + // EDI and ESI, or at least placeholders for them, are already on the stack. PUSH ebp PUSH ebx PUSH edx @@ -268,13 +267,25 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) + * when EDI is already saved. + */ +MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg) + // Save core registers from highest to lowest to agree with core spills bitmap. + // EDI, or at least a placeholder for it, is already on the stack. + PUSH esi + SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg) +END_MACRO + + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) */ MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg) PUSH edi SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg) END_MACRO -MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS) // Restore FPRs. Method and padding is still on the stack. movsd 16(%esp), %xmm0 movsd 24(%esp), %xmm1 @@ -284,13 +295,10 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) movsd 56(%esp), %xmm5 movsd 64(%esp), %xmm6 movsd 72(%esp), %xmm7 +END_MACRO - // Remove save everything callee save method, stack alignment padding and FPRs. - addl MACRO_LITERAL(16 + 8 * 8), %esp - CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8)) - - // Restore core registers. - POP eax +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX) + // Restore core registers (except eax). POP ecx POP edx POP ebx @@ -299,12 +307,32 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) POP edi END_MACRO +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs. + addl MACRO_LITERAL(16 + 8 * 8), %esp + CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8)) + + POP eax + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX +END_MACRO + +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs, skip EAX. + addl MACRO_LITERAL(16 + 8 * 8 + 4), %esp + CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8 + 4)) + + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX +END_MACRO + /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_. + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -MACRO0(DELIVER_PENDING_EXCEPTION) - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw +MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) // Outgoing argument set up subl MACRO_LITERAL(12), %esp // alignment padding CFI_ADJUST_CFA_OFFSET(12) @@ -314,6 +342,15 @@ MACRO0(DELIVER_PENDING_EXCEPTION) UNREACHABLE END_MACRO + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +MACRO0(DELIVER_PENDING_EXCEPTION) + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_MACRO + MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) DEFINE_FUNCTION VAR(c_name) SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context @@ -1114,26 +1151,42 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab END_FUNCTION art_quick_alloc_object_region_tlab DEFINE_FUNCTION art_quick_resolve_string - movl 4(%esp), %ecx // get referrer - movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx // get declaring class - movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx // get string dex cache - movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx - andl %eax, %edx - movlps (%ecx, %edx, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0 // load string idx and ptr to xmm0 - movd %xmm0, %ecx // extract pointer + PUSH edi + PUSH esi + // Save xmm0 at an aligned address on the stack. + subl MACRO_LITERAL(12), %esp + CFI_ADJUST_CFA_OFFSET(12) + movsd %xmm0, 0(%esp) + movl 24(%esp), %edi // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi // get declaring class + movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi // get string dex cache + movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi + andl %eax, %esi + movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0 // load string idx and ptr to xmm0 + movd %xmm0, %edi // extract pointer pshufd LITERAL(0x55), %xmm0, %xmm0 // shuffle index into lowest bits - movd %xmm0, %edx // extract index - cmp %edx, %eax + movd %xmm0, %esi // extract index + // Restore xmm0 and remove it together with padding from the stack. + movsd 0(%esp), %xmm0 + addl MACRO_LITERAL(12), %esp + CFI_ADJUST_CFA_OFFSET(-12) + cmp %esi, %eax jne .Lart_quick_resolve_string_slow_path - movl %ecx, %eax + movl %edi, %eax + CFI_REMEMBER_STATE + POP esi + POP edi #ifdef USE_READ_BARRIER cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_resolve_string_marking + jne .Lnot_null_art_quick_read_barrier_mark_reg00 #endif ret + CFI_RESTORE_STATE + CFI_DEF_CFA(esp, 24) // workaround for clang bug: 31975598 + .Lart_quick_resolve_string_slow_path: // Outgoing argument set up - SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx + SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx subl LITERAL(8), %esp // push padding CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() @@ -1142,21 +1195,15 @@ DEFINE_FUNCTION art_quick_resolve_string call SYMBOL(artResolveStringFromCode) addl LITERAL(16), %esp // pop arguments CFI_ADJUST_CFA_OFFSET(-16) - RESTORE_SAVE_REFS_ONLY_FRAME - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -.Lart_quick_resolve_string_marking: - SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax) - jnz .Lart_quick_resolve_string_no_rb - subl LITERAL(12), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(12) - PUSH eax // Pass the string as the first param. - call SYMBOL(artReadBarrierMark) - addl LITERAL(16), %esp - CFI_ADJUST_CFA_OFFSET(-16) -.Lart_quick_resolve_string_no_rb: - RESTORE_SAVE_REFS_ONLY_FRAME + testl %eax, %eax // If result is null, deliver the OOME. + jz 1f + CFI_REMEMBER_STATE + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX ret + CFI_RESTORE_STATE + CFI_DEF_CFA(esp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598 +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END_FUNCTION art_quick_resolve_string ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER @@ -2102,6 +2149,7 @@ MACRO2(READ_BARRIER_MARK_REG, name, reg) // Null check so that we can load the lock word. test REG_VAR(reg), REG_VAR(reg) jz .Lret_rb_\name +.Lnot_null_\name: // Check the mark bit, if it is 1 return. testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) jz .Lslow_rb_\name diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S index af4a6c4f99..28018c5f24 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.S +++ b/runtime/arch/x86_64/asm_support_x86_64.S @@ -76,6 +76,8 @@ #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg #define CFI_RESTORE(reg) .cfi_restore reg #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size + #define CFI_RESTORE_STATE .cfi_restore_state + #define CFI_REMEMBER_STATE .cfi_remember_state #else // Mac OS' doesn't like cfi_* directives. #define CFI_STARTPROC @@ -85,6 +87,8 @@ #define CFI_DEF_CFA_REGISTER(reg) #define CFI_RESTORE(reg) #define CFI_REL_OFFSET(reg,size) + #define CFI_RESTORE_STATE + #define CFI_REMEMBER_STATE #endif // Symbols. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index c3321e17b9..afa1c0ff03 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -263,16 +263,15 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) - * when R15 is already saved. + * when R14 and R15 are already saved. */ -MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED) +MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED) #if defined(__APPLE__) int3 int3 #else // Save core registers from highest to lowest to agree with core spills bitmap. - // R15, or at least a placeholder for it, is already on the stack. - PUSH r14 + // R14 and R15, or at least placeholders for them, are already on the stack. PUSH r13 PUSH r12 PUSH r11 @@ -326,13 +325,23 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) + * when R15 is already saved. + */ +MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED) + PUSH r14 + SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED +END_MACRO + + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) */ MACRO0(SETUP_SAVE_EVERYTHING_FRAME) PUSH r15 SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED END_MACRO -MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS) // Restore FPRs. Method and padding is still on the stack. movq 16(%rsp), %xmm0 movq 24(%rsp), %xmm1 @@ -350,12 +359,10 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) movq 120(%rsp), %xmm13 movq 128(%rsp), %xmm14 movq 136(%rsp), %xmm15 +END_MACRO - // Remove save everything callee save method, stack alignment padding and FPRs. - addq MACRO_LITERAL(16 + 16 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8)) - // Restore callee and GPR args, mixed together to agree with core spills bitmap. - POP rax +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX) + // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap. POP rcx POP rdx POP rbx @@ -372,19 +379,47 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) POP r15 END_MACRO +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs. + addq MACRO_LITERAL(16 + 16 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8)) + + POP rax + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX +END_MACRO + +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX. + addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8)) + + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX +END_MACRO /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_. + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -MACRO0(DELIVER_PENDING_EXCEPTION) - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw +MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) // (Thread*) setup movq %gs:THREAD_SELF_OFFSET, %rdi call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*) UNREACHABLE END_MACRO + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +MACRO0(DELIVER_PENDING_EXCEPTION) + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_MACRO + MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) DEFINE_FUNCTION VAR(c_name) SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context @@ -1295,45 +1330,48 @@ DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab END_FUNCTION art_quick_alloc_object_initialized_region_tlab DEFINE_FUNCTION art_quick_resolve_string - movq 8(%rsp), %rcx // get referrer - movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx // get declaring class - movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx // get string dex cache - movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx - andq %rdi, %rdx - movq (%rcx, %rdx, STRING_DEX_CACHE_ELEMENT_SIZE), %rdx - movl %edx, %eax - shrq LITERAL(32), %rdx - cmp %rdx, %rdi + // Custom calling convention: RAX serves as both input and output. + PUSH r15 + PUSH r14 + movq 24(%rsp), %r15 // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d // get declaring class + movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15 // get string dex cache + movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d + andl %eax, %r14d + movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14 + movl %r14d, %r15d + shrq LITERAL(32), %r14 + cmpl %r14d, %eax jne .Lart_quick_resolve_string_slow_path + movl %r15d, %eax + CFI_REMEMBER_STATE + POP r14 + POP r15 #ifdef USE_READ_BARRIER cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_resolve_string_marking + jne .Lnot_null_art_quick_read_barrier_mark_reg00 #endif ret -// Slow path, the index did not match + CFI_RESTORE_STATE + CFI_DEF_CFA(rsp, 24) // workaround for clang bug: 31975598 + +// Slow path, the index did not match. .Lart_quick_resolve_string_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME - movq %rcx, %rax + SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED // Outgoing argument set up + movl %eax, %edi // pass string index movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() call SYMBOL(artResolveStringFromCode) // artResolveStringFromCode(arg0, referrer, Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -// GC is marking case, need to check the mark bit. -.Lart_quick_resolve_string_marking: - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax) - jnz .Lart_quick_resolve_string_no_rb - // Save LR so that we can return, also x1 for alignment purposes - PUSH rdi - PUSH rsi - subq LITERAL(8), %rsp // 16 byte alignment - movq %rax, %rdi - call SYMBOL(artReadBarrierMark) - addq LITERAL(8), %rsp - POP rsi - POP rdi -.Lart_quick_resolve_string_no_rb: + + testl %eax, %eax // If result is null, deliver the OOME. + jz 1f + CFI_REMEMBER_STATE + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX // restore frame up to return address ret + CFI_RESTORE_STATE + CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598 +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END_FUNCTION art_quick_resolve_string ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER @@ -2230,6 +2268,7 @@ MACRO2(READ_BARRIER_MARK_REG, name, reg) // Null check so that we can load the lock word. testq REG_VAR(reg), REG_VAR(reg) jz .Lret_rb_\name +.Lnot_null_\name: // Check the mark bit, if it is 1 return. testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) jz .Lslow_rb_\name diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc index 2a3a6bfa06..4d47b83185 100644 --- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc @@ -60,7 +60,11 @@ extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); - auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly); + auto* caller = GetCalleeSaveMethodCaller( + self, + // TODO: Change art_quick_resolve_string on MIPS and MIPS64 to kSaveEverything. + (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly + : Runtime::kSaveEverything); mirror::String* result = ResolveStringFromCode(caller, string_idx); if (LIKELY(result != nullptr)) { // For AOT code, we need a write barrier for the class loader that holds |