diff options
author | 2021-10-13 15:39:37 +0000 | |
---|---|---|
committer | 2021-11-01 08:31:54 +0000 | |
commit | 2d4feeb67912d64b9e980e6687794826a5c22f9d (patch) | |
tree | 7ab2071bbf5d5907d205b8e2a092ea9869974ba1 | |
parent | 60abdd9c89525a277d75df19ff2792614651e1ff (diff) |
Add support for calling entry / exit hooks directly from JIT code
The idea of this CL is to avoid maintaining the instrumentation stack
and manipulating the return addresses on the stack to call the entry /
exit hooks. This Cl only addresses this for JITed code. In follow up
CLs, we will extend this to others (native, nterp). Once we have
everything in place we could remove the complexity of instrumentation
stack.
This CL introduces new nodes (HMethodEntry / HMethodExit(Void)) that
generate code to call the trace entry / exit hooks when
instrumentation_stubs are installed. Currently these are introduced for
JITed code in debuggable mode. The entry / exit hooks roughly do the
same this as instrumentation entry / exit points.
We also extend the JITed frame slots by adding a ShouldDeoptimize slot.
This will be used to force deoptimization of frames when requested by
jvmti (for ex: structural re-definition).
Test: art/testrunner.py
Change-Id: Id4aa439731d214a8d2b820a67e75415ca1d5424e
35 files changed, 851 insertions, 145 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 74efc9ea8d..d455614cfd 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -822,6 +822,31 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); }; +class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction) + : SlowPathCodeARM64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) { @@ -1113,6 +1138,47 @@ void ParallelMoveResolverARM64::EmitMove(size_t index) { codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); } +void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + DataType::Type return_type = method_hook->InputAt(0)->GetType(); + locations->SetInAt(0, ARM64ReturnLocation(return_type)); +} + +void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) { + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireX(); + Register value = temps.AcquireW(); + + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction); + codegen_->AddSlowPath(slow_path); + + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + __ Mov(temp, address + offset); + __ Ldrh(value, MemOperand(temp, 0)); + __ Cbnz(value, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorARM64::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderARM64::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { MacroAssembler* masm = GetVIXLAssembler(); if (GetCompilerOptions().CountHotnessInCompiledCode()) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index d4546e5bd5..750151aa24 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -388,6 +388,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void GenerateIntRemForConstDenom(HRem *instruction); void GenerateIntRemForPower2Denom(HRem *instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenerateMethodEntryExitHook(HInstruction* instruction); // Helpers to set up locations for vector memory operations. Returns the memory operand and, // if used, sets the output parameter scratch to a temporary register used in this operand, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 700202ba20..bf0c77da57 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -971,6 +971,31 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL); }; +class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction) + : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL); +}; + inline vixl32::Condition ARMCondition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -2111,6 +2136,44 @@ void CodeGeneratorARMVIXL::ComputeSpillMask() { } } +void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType())); +} + +void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + + SlowPathCodeARMVIXL* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction); + codegen_->AddSlowPath(slow_path); + + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation()); + __ Mov(temp, address + offset); + __ Ldrh(temp, MemOperand(temp, 0)); + __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(GetVIXLAssembler()); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index b797c30a39..aa40755b29 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -431,6 +431,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemConstantIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenerateMethodEntryExitHook(HInstruction* instruction); vixl::aarch32::MemOperand VecAddress( HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c49b08ba69..a04b4129a8 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -942,6 +942,30 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86); }; +class MethodEntryExitHooksSlowPathX86 : public SlowPathCode { + public: + explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86); +}; + #undef __ // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT @@ -1097,6 +1121,70 @@ static dwarf::Reg DWARFReg(Register reg) { return dwarf::Reg::X86Core(static_cast<int>(reg)); } +void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) { + switch (ret->InputAt(0)->GetType()) { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RegisterLocation(EAX)); + break; + + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX)); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); + break; + + case DataType::Type::kVoid: + locations->SetInAt(0, Location::NoLocation()); + break; + + default: + LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); + } +} + +void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + SetInForReturnValue(method_hook, locations); +} + +void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) { + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction); + codegen_->AddSlowPath(slow_path); + + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + __ cmpw(Address::Absolute(address + offset), Immediate(0)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { Register reg = EAX; @@ -2408,31 +2496,7 @@ void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNU void LocationsBuilderX86::VisitReturn(HReturn* ret) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); - switch (ret->InputAt(0)->GetType()) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - locations->SetInAt(0, Location::RegisterLocation(EAX)); - break; - - case DataType::Type::kInt64: - locations->SetInAt( - 0, Location::RegisterPairLocation(EAX, EDX)); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt( - 0, Location::FpuRegisterLocation(XMM0)); - break; - - default: - LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); - } + SetInForReturnValue(ret, locations); } void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 94f010e598..75c5cebb5e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -344,6 +344,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { bool CpuHasAvxFeatureFlag(); bool CpuHasAvx2FeatureFlag(); + void GenerateMethodEntryExitHook(HInstruction* instruction); + X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dae2ae2b84..4ec2dd7a27 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -965,6 +965,31 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); }; +class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode { + public: + explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction) + : SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64); +}; + #undef __ // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT @@ -1494,6 +1519,68 @@ static dwarf::Reg DWARFReg(FloatRegister reg) { return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); } +void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) { + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction); + codegen_->AddSlowPath(slow_path); + + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + __ movq(CpuRegister(TMP), Immediate(address + offset)); + __ cmpw(Address(CpuRegister(TMP), 0), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) { + switch (instr->InputAt(0)->GetType()) { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RegisterLocation(RAX)); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); + break; + + case DataType::Type::kVoid: + locations->SetInAt(0, Location::NoLocation()); + break; + + default: + LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType(); + } +} + +void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + SetInForReturnValue(method_hook, locations); +} + +void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { NearLabel overflow; @@ -2542,26 +2629,7 @@ void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); - switch (ret->InputAt(0)->GetType()) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RegisterLocation(RAX)); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); - break; - - default: - LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); - } + SetInForReturnValue(ret, locations); } void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 3e601bb97a..1115c8379d 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -276,6 +276,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + void GenerateMethodEntryExitHook(HInstruction* instruction); // Generate a heap reference load using one register `out`: // diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 3abbbae573..c7426828cb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -812,6 +812,11 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, HBasicBlock* bb_cursor) { HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetAllocator()) HShouldDeoptimizeFlag(graph_->GetAllocator(), dex_pc); + // ShouldDeoptimizeFlag is used to perform a deoptimization because of a CHA + // invalidation or for debugging reasons. It is OK to just check for non-zero + // value here instead of the specific CHA value. When a debugging deopt is + // requested we deoptimize before we execute any code and hence we shouldn't + // see that case here. HInstruction* compare = new (graph_->GetAllocator()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); HInstruction* deopt = new (graph_->GetAllocator()) HDeoptimize( diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 390a2bb0be..ed760f190d 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -372,6 +372,9 @@ bool HInstructionBuilder::Build() { if (current_block_->IsEntryBlock()) { InitializeParameters(); AppendInstruction(new (allocator_) HSuspendCheck(0u)); + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodEntryHook(0u)); + } AppendInstruction(new (allocator_) HGoto(0u)); continue; } else if (current_block_->IsExitBlock()) { @@ -822,10 +825,18 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, compilation_stats_, MethodCompilationStat::kConstructorFenceGeneratedFinal); } + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + // Return value is not used for void functions. We pass NullConstant to + // avoid special cases when generating code. + AppendInstruction(new (allocator_) HMethodExitHook(graph_->GetNullConstant(), dex_pc)); + } AppendInstruction(new (allocator_) HReturnVoid(dex_pc)); } else { DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodExitHook(value, dex_pc)); + } AppendInstruction(new (allocator_) HReturn(value, dex_pc)); } current_block_ = nullptr; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 17080f0056..24786931f2 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2913,7 +2913,10 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } else if (current->IsCurrentMethod()) { replacement = outer_graph->GetCurrentMethod(); } else { - DCHECK(current->IsGoto() || current->IsSuspendCheck()); + // It is OK to ignore MethodEntryHook for inlined functions. + // In debug mode we don't inline and in release mode method + // tracing is best effort so OK to ignore them. + DCHECK(current->IsGoto() || current->IsSuspendCheck() || current->IsMethodEntryHook()); entry_block_->RemoveInstruction(current); } if (replacement != nullptr) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 06fb88e837..978e7c419e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -21,6 +21,7 @@ #include <array> #include <type_traits> +#include "art_method.h" #include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/arena_containers.h" @@ -32,7 +33,6 @@ #include "base/quasi_atomic.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" -#include "art_method.h" #include "block_namer.h" #include "class_root.h" #include "compilation_kind.h" @@ -680,7 +680,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { } bool HasShouldDeoptimizeFlag() const { - return number_of_cha_guards_ != 0; + return number_of_cha_guards_ != 0 || debuggable_; } bool HasTryCatch() const { return has_try_catch_; } @@ -1530,6 +1530,8 @@ class HLoopInformationOutwardIterator : public ValueObject { M(LongConstant, Constant) \ M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(MethodEntryHook, Instruction) \ + M(MethodExitHook, Instruction) \ M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ @@ -2991,6 +2993,38 @@ class HExpression<0> : public HInstruction { friend class SsaBuilder; }; +class HMethodEntryHook : public HExpression<0> { + public: + explicit HMethodEntryHook(uint32_t dex_pc) + : HExpression(kMethodEntryHook, SideEffects::All(), dex_pc) {} + + bool NeedsEnvironment() const override { + return true; + } + + DECLARE_INSTRUCTION(MethodEntryHook); + + protected: + DEFAULT_COPY_CONSTRUCTOR(MethodEntryHook); +}; + +class HMethodExitHook : public HExpression<1> { + public: + HMethodExitHook(HInstruction* value, uint32_t dex_pc) + : HExpression(kMethodExitHook, SideEffects::All(), dex_pc) { + SetRawInputAt(0, value); + } + + bool NeedsEnvironment() const override { + return true; + } + + DECLARE_INSTRUCTION(MethodExitHook); + + protected: + DEFAULT_COPY_CONSTRUCTOR(MethodExitHook); +}; + // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow // instruction that branches to the exit block. class HReturnVoid final : public HExpression<0> { diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index 7bcff2bafc..0d7e0e5833 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -505,7 +505,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(64U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(4U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(171 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/openjdkjvmti/deopt_manager.cc b/openjdkjvmti/deopt_manager.cc index bf1b4f0714..cf28a71932 100644 --- a/openjdkjvmti/deopt_manager.cc +++ b/openjdkjvmti/deopt_manager.cc @@ -492,7 +492,12 @@ void DeoptManager::DeoptimizeThread(art::Thread* target) { art::gc::GcCause::kGcCauseDebugger, art::gc::CollectorType::kCollectorTypeDebugger); art::ScopedSuspendAll ssa("Instrument thread stack"); - art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(target); + // Prepare the stack so methods can be deoptimized as and when required. + // This by itself doesn't cause any methods to deoptimize but enables + // deoptimization on demand. + art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack( + target, + /* deopt_all_frames= */ false); } extern DeoptManager* gDeoptManager; diff --git a/openjdkjvmti/ti_heap.cc b/openjdkjvmti/ti_heap.cc index 27fed282aa..bd9d2ddd08 100644 --- a/openjdkjvmti/ti_heap.cc +++ b/openjdkjvmti/ti_heap.cc @@ -1780,7 +1780,7 @@ static void ReplaceStrongRoots(art::Thread* self, const ObjectMap& map) // already have. // TODO We technically only need to do this if the frames are not already being interpreted. // The cost for doing an extra stack walk is unlikely to be worth it though. - instr->InstrumentThreadStack(t); + instr->InstrumentThreadStack(t, /* deopt_all_frames= */ true); } } } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index f5f127472e..5ef1d3e17a 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2525,3 +2525,36 @@ ENTRY art_quick_compile_optimized // artCompileOptimized doesn't allow thread suspension. blx lr END art_quick_compile_optimized + +// On entry, method is at the bottom of the stack. +ENTRY art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME r0 + ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod + mov r1, rSELF @ pass Thread::Current + bl artMethodEntryHook @ (ArtMethod*, Thread*) + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + blx lr +END art_quick_method_entry_hook + +ENTRY art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME r2 + + add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame + add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame + ldr r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod* + mov r0, rSELF @ pass Thread::Current + blx artMethodExitHook @ (Thread*, ArtMethod*, gpr_res*, fpr_res*) + + .cfi_remember_state + cbnz r0, .Ldo_deliver_instrumentation_exception_exit @ Deliver exception + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + blx lr +.Ldo_deliver_instrumentation_exception_exit: + .cfi_restore_state + .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END art_quick_method_exit_hook diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 022a0e4053..e5dbeda42d 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2630,3 +2630,40 @@ ENTRY art_quick_compile_optimized // artCompileOptimized doesn't allow thread suspension. ret END art_quick_compile_optimized + + .extern artMethodEntryHook +ENTRY art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME + + ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod* + mov x1, xSELF // pass Thread::Current + bl artMethodEntryHook // (ArtMethod*, Thread*) + + RESTORE_SAVE_EVERYTHING_FRAME // Note: will restore xSELF + REFRESH_MARKING_REGISTER + ret +END art_quick_method_entry_hook + + .extern artMethodExitHook +ENTRY art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME + + add x3, sp, #16 // floating-point result ptr in kSaveEverything frame + add x2, sp, #272 // integer result ptr in kSaveEverything frame + ldr x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // ArtMethod* + mov x0, xSELF // Thread::Current + bl artMethodExitHook // (Thread*, ArtMethod*, gpr_res*, fpr_res*) + + .cfi_remember_state + cbnz x0, .Ldo_deliver_instrumentation_exception_exit // Handle exception + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + ret +.Ldo_deliver_instrumentation_exception_exit: + .cfi_restore_state + .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END art_quick_method_exit_hook + diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index cda98d2921..2f6af4f5de 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -2381,3 +2381,62 @@ DEFINE_FUNCTION art_quick_compile_optimized RESTORE_SAVE_EVERYTHING_FRAME ret END_FUNCTION art_quick_compile_optimized + +DEFINE_FUNCTION art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME edx + mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %eax // Fetch ArtMethod + subl LITERAL(8), %esp + CFI_ADJUST_CFA_OFFSET(8) + + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + CFI_ADJUST_CFA_OFFSET(4) + pushl %eax // Pass Method*. + CFI_ADJUST_CFA_OFFSET(4) + + call SYMBOL(artMethodEntryHook) // (Method*, Thread*) + + addl LITERAL(16), %esp // Pop arguments. + CFI_ADJUST_CFA_OFFSET(-16) + + RESTORE_SAVE_EVERYTHING_FRAME + ret +END_FUNCTION art_quick_method_entry_hook + +DEFINE_FUNCTION art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME ebx + + mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %ebx // Remember ArtMethod* + subl LITERAL(8), %esp // Align stack. + CFI_ADJUST_CFA_OFFSET(8) + PUSH_ARG edx // Save gpr return value. edx and eax need to be together + // which isn't the case in kSaveEverything frame. + PUSH_ARG eax + movl %esp, %edx // Get pointer to gpr_result + leal 32(%esp), %eax // Get pointer to fpr_result, in kSaveEverything frame + PUSH_ARG eax // Pass fpr_result + PUSH_ARG edx // Pass gpr_result + PUSH_ARG ebx // Pass ArtMethod* + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current. + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artMethodExitHook) // (Thread*, ArtMethod*, gpr_result*, fpr_result*) + + // Return result could have been changed if it's a reference. + movl 16(%esp), %ecx + movl %ecx, (80+32)(%esp) + addl LITERAL(32), %esp // Pop arguments and grp_result. + CFI_ADJUST_CFA_OFFSET(-32) + + cmpl LITERAL(1), %eax // Check if we returned error. + CFI_REMEMBER_STATE + je .Ldo_deliver_instrumentation_exception_exit + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + ret +.Ldo_deliver_instrumentation_exception_exit: + CFI_RESTORE_STATE_AND_DEF_CFA esp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_FUNCTION art_quick_method_exit_hook + + + diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 8c21384c62..136198fe55 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -2208,3 +2208,40 @@ DEFINE_FUNCTION art_quick_compile_optimized RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address ret END_FUNCTION art_quick_compile_optimized + +// On entry, method is at the bottom of the stack. +DEFINE_FUNCTION art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME + + movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + + call SYMBOL(artMethodEntryHook) // (ArtMethod*, Thread*) + + RESTORE_SAVE_EVERYTHING_FRAME + ret +END_FUNCTION art_quick_method_entry_hook + +// On entry, method is at the bottom of the stack. +// and r8 has should_deopt_frame value. +DEFINE_FUNCTION art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME + + leaq 16(%rsp), %rcx // floating-point result pointer in kSaveEverything + // frame + leaq 144(%rsp), %rdx // integer result pointer in kSaveEverything frame + movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod + movq %gs:THREAD_SELF_OFFSET, %rdi // Thread::Current + call SYMBOL(artMethodExitHook) // (Thread*, SP, gpr_res*, fpr_res*) + + cmpq LITERAL(1), %rax + CFI_REMEMBER_STATE + je .Ldo_deliver_instrumentation_exception_exit + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + ret +.Ldo_deliver_instrumentation_exception_exit: + CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_FUNCTION art_quick_method_entry_hook diff --git a/runtime/cha.cc b/runtime/cha.cc index c345af8232..392b35cd81 100644 --- a/runtime/cha.cc +++ b/runtime/cha.cc @@ -219,27 +219,12 @@ class CHAStackVisitor final : public StackVisitor { } // The compiled code on stack is not valid anymore. Need to deoptimize. - SetShouldDeoptimizeFlag(); + SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kCHA); return true; } private: - void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) { - QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo(); - size_t frame_size = frame_info.FrameSizeInBytes(); - uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); - size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) * - GetBytesPerGprSpillLocation(kRuntimeISA); - size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) * - GetBytesPerFprSpillLocation(kRuntimeISA); - size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize; - uint8_t* should_deoptimize_addr = sp + offset; - // Set deoptimization flag to 1. - DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1); - *should_deoptimize_addr = 1; - } - // Set of method headers for compiled code that should be deoptimized. const std::unordered_set<OatQuickMethodHeader*>& method_headers_; diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h index 5be6f3dab1..c2e6a6585a 100644 --- a/runtime/deoptimization_kind.h +++ b/runtime/deoptimization_kind.h @@ -29,6 +29,7 @@ enum class DeoptimizationKind { kLoopNullBCE, kBlockBCE, kCHA, + kDebugging, kFullFrame, kLast = kFullFrame }; @@ -42,6 +43,7 @@ inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null"; case DeoptimizationKind::kBlockBCE: return "block bounds check elimination"; case DeoptimizationKind::kCHA: return "class hierarchy analysis"; + case DeoptimizationKind::kDebugging: return "Deopt requested for debug support"; case DeoptimizationKind::kFullFrame: return "full frame"; } LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind); @@ -50,6 +52,15 @@ inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind); +// We use a DeoptimizationStackSlot to record if a deoptimization is required +// for functions that are already on stack. The value in the slot specifies the +// reason we need to deoptimize. +enum class DeoptimizeFlagValue: uint8_t { + kCHA = 0b01, + kDebug = 0b10, + kAll = kCHA | kDebug +}; + } // namespace art #endif // ART_RUNTIME_DEOPTIMIZATION_KIND_H_ diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index f3fc97eca2..3fc23ee66e 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -131,6 +131,10 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp qpoints->pUpdateInlineCache = art_quick_update_inline_cache; qpoints->pCompileOptimized = art_quick_compile_optimized; + // Tracing hooks + qpoints->pMethodEntryHook = art_quick_method_entry_hook; + qpoints->pMethodExitHook = art_quick_method_exit_hook; + bool should_report = false; PaletteShouldReportJniInvocations(&should_report); if (should_report) { diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index 5deb55752f..f69ab1d38b 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -208,6 +208,8 @@ V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \ V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \ \ + V(MethodEntryHook, void, ArtMethod*, Thread*) \ + V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*) #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ // #define is only for lint. diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 3279f7d738..be9d949930 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -60,6 +60,9 @@ namespace art { +extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self); +extern "C" NO_RETURN void artDeoptimize(Thread* self); + // Visits the arguments as saved to the stack by a CalleeSaveType::kRefAndArgs callee save frame. class QuickArgumentVisitor { // Number of bytes for each out register in the caller method's frame. @@ -2588,4 +2591,74 @@ extern "C" uint64_t artInvokeCustom(uint32_t call_site_idx, Thread* self, ArtMet return result.GetJ(); } +extern "C" void artMethodEntryHook(ArtMethod* method, Thread* self, ArtMethod** sp ATTRIBUTE_UNUSED) + REQUIRES_SHARED(Locks::mutator_lock_) { + instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation(); + instr->MethodEnterEvent(self, method); + if (instr->IsDeoptimized(method)) { + // Instrumentation can request deoptimizing only a particular method (for + // ex: when there are break points on the method). In such cases deoptimize + // only this method. FullFrame deoptimizations are handled on method exits. + artDeoptimizeFromCompiledCode(DeoptimizationKind::kDebugging, self); + } +} + +extern "C" int artMethodExitHook(Thread* self, + ArtMethod* method, + uint64_t* gpr_result, + uint64_t* fpr_result) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current())); + CHECK(gpr_result != nullptr); + CHECK(fpr_result != nullptr); + // Instrumentation exit stub must not be entered with a pending exception. + CHECK(!self->IsExceptionPending()) + << "Enter instrumentation exit stub with pending exception " << self->GetException()->Dump(); + + instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation(); + bool is_ref; + JValue return_value = instr->GetReturnValue(self, method, &is_ref, gpr_result, fpr_result); + bool deoptimize = false; + { + StackHandleScope<1> hs(self); + MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); + if (is_ref) { + // Take a handle to the return value so we won't lose it if we suspend. + res.Assign(return_value.GetL()); + } + uint32_t dex_pc = dex::kDexNoIndex; + DCHECK(!method->IsRuntimeMethod()); + instr->MethodExitEvent(self, + ObjPtr<mirror::Object>(), + method, + dex_pc, + /* frame= */ {}, + return_value); + + // Deoptimize if the caller needs to continue execution in the interpreter. Do nothing if we get + // back to an upcall. + NthCallerVisitor visitor(self, 1, true); + visitor.WalkStack(true); + deoptimize = instr->ShouldDeoptimizeMethod(self, visitor); + + if (is_ref) { + // Restore the return value if it's a reference since it might have moved. + *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get(); + } + } + + if (self->IsExceptionPending() || self->ObserveAsyncException()) { + return 1; + } + + if (deoptimize) { + DeoptimizationMethodType deopt_method_type = instr->GetDeoptimizationMethodType(method); + self->PushDeoptimizationContext(return_value, is_ref, nullptr, false, deopt_method_type); + artDeoptimize(self); + UNREACHABLE(); + } + + return 0; +} + } // namespace art diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index d2096ec2f1..c4e62e5b87 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -96,6 +96,8 @@ static inline const void* GetQuickInstrumentationExitPc() { extern "C" void* art_quick_string_builder_append(uint32_t format); extern "C" void art_quick_compile_optimized(ArtMethod*, Thread*); +extern "C" void art_quick_method_entry_hook(ArtMethod*, Thread*); +extern "C" int32_t art_quick_method_exit_hook(Thread*, ArtMethod*, uint64_t*, uint64_t*); } // namespace art diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 73f97bc60b..b515245a12 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -405,9 +405,13 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow, sizeof(void*)); + EXPECT_OFFSET_DIFFNP( + QuickEntryPoints, pReadBarrierForRootSlow, pMethodEntryHook, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodEntryHook, pMethodExitHook, sizeof(void*)); - CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierForRootSlow) - + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all); + CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pMethodExitHook) + sizeof(void*) == + sizeof(QuickEntryPoints), + QuickEntryPoints_all); } }; diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index 97dad8cef7..91c30c7ba7 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -231,6 +231,26 @@ bool Instrumentation::NeedDebugVersionFor(ArtMethod* method) const !method->IsProxyMethod(); } +bool Instrumentation::CodeNeedsEntryExitStub(const void* code) { + // In some tests runtime isn't setup fully and hence the entry points could + // be nullptr. + if (code == nullptr) { + return true; + } + // When jiting code for debuggable apps we generate the code to call method + // entry / exit hooks when required. Hence it is not required to update + // to instrumentation entry point for JITed code in debuggable mode. + if (!Runtime::Current()->IsJavaDebuggable()) { + return true; + } + + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) { + return false; + } + return true; +} + void Instrumentation::InstallStubsForMethod(ArtMethod* method) { if (!method->IsInvokable() || method->IsProxyMethod()) { // Do not change stubs for these methods. @@ -274,7 +294,12 @@ void Instrumentation::InstallStubsForMethod(ArtMethod* method) { if (entry_exit_stubs_installed_) { // This needs to be checked first since the instrumentation entrypoint will be able to // find the actual JIT compiled code that corresponds to this method. - new_quick_code = GetQuickInstrumentationEntryPoint(); + const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize); + if (CodeNeedsEntryExitStub(code)) { + new_quick_code = GetQuickInstrumentationEntryPoint(); + } else { + new_quick_code = code; + } } else if (NeedDebugVersionFor(method)) { // It would be great to search the JIT for its implementation here but we cannot due to // the locks we hold. Instead just set to the interpreter bridge and that code will search @@ -292,23 +317,30 @@ void Instrumentation::InstallStubsForMethod(ArtMethod* method) { } // Places the instrumentation exit pc as the return PC for every quick frame. This also allows -// deoptimization of quick frames to interpreter frames. +// deoptimization of quick frames to interpreter frames. When force_deopt is +// true the frames have to be deoptimized. If the frame has a deoptimization +// stack slot (all Jited frames), it is set to true to indicate this. For frames +// that do not have this slot, the force_deopt_id on the InstrumentationStack is +// used to check if the frame needs to be deoptimized. When force_deopt is false +// we just instrument the stack for method entry / exit hooks. // Since we may already have done this previously, we need to push new instrumentation frame before // existing instrumentation frames. -void InstrumentationInstallStack(Thread* thread, void* arg) +void InstrumentationInstallStack(Thread* thread, void* arg, bool deopt_all_frames) REQUIRES(Locks::mutator_lock_) { Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current()); struct InstallStackVisitor final : public StackVisitor { InstallStackVisitor(Thread* thread_in, Context* context, uintptr_t instrumentation_exit_pc, - uint64_t force_deopt_id) + uint64_t force_deopt_id, + bool deopt_all_frames) : StackVisitor(thread_in, context, kInstrumentationStackWalk), instrumentation_stack_(thread_in->GetInstrumentationStack()), instrumentation_exit_pc_(instrumentation_exit_pc), reached_existing_instrumentation_frames_(false), last_return_pc_(0), - force_deopt_id_(force_deopt_id) {} + force_deopt_id_(force_deopt_id), + deopt_all_frames_(deopt_all_frames) {} bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) { ArtMethod* m = GetMethod(); @@ -366,6 +398,15 @@ void InstrumentationInstallStack(Thread* thread, void* arg) LOG(INFO) << "Ignoring already instrumented " << frame.Dump(); } } else { + // If it is a JITed frame then just set the deopt bit if required + // otherwise continue + const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader(); + if (deopt_all_frames_ && + method_header != nullptr && + method_header->HasShouldDeoptimizeFlag()) { + SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kDebug); + return true; + } CHECK_NE(return_pc, 0U); if (UNLIKELY(reached_existing_instrumentation_frames_ && !m->IsRuntimeMethod())) { // We already saw an existing instrumentation frame so this should be a runtime-method @@ -373,9 +414,8 @@ void InstrumentationInstallStack(Thread* thread, void* arg) std::string thread_name; GetThread()->GetThreadName(thread_name); uint32_t dex_pc = dex::kDexNoIndex; - if (last_return_pc_ != 0 && GetCurrentOatQuickMethodHeader() != nullptr) { - dex_pc = GetCurrentOatQuickMethodHeader()->ToDexPc( - GetCurrentQuickFrame(), last_return_pc_); + if (last_return_pc_ != 0 && method_header != nullptr) { + dex_pc = method_header->ToDexPc(GetCurrentQuickFrame(), last_return_pc_); } LOG(FATAL) << "While walking " << thread_name << " found unexpected non-runtime method" << " without instrumentation exit return or interpreter frame." @@ -413,6 +453,7 @@ void InstrumentationInstallStack(Thread* thread, void* arg) bool reached_existing_instrumentation_frames_; uintptr_t last_return_pc_; uint64_t force_deopt_id_; + bool deopt_all_frames_; }; if (kVerboseInstrumentation) { std::string thread_name; @@ -423,8 +464,11 @@ void InstrumentationInstallStack(Thread* thread, void* arg) Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg); std::unique_ptr<Context> context(Context::Create()); uintptr_t instrumentation_exit_pc = reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()); - InstallStackVisitor visitor( - thread, context.get(), instrumentation_exit_pc, instrumentation->current_force_deopt_id_); + InstallStackVisitor visitor(thread, + context.get(), + instrumentation_exit_pc, + instrumentation->current_force_deopt_id_, + deopt_all_frames); visitor.WalkStack(true); CHECK_EQ(visitor.dex_pcs_.size(), thread->GetInstrumentationStack()->size()); @@ -449,9 +493,9 @@ void InstrumentationInstallStack(Thread* thread, void* arg) thread->VerifyStack(); } -void Instrumentation::InstrumentThreadStack(Thread* thread) { +void Instrumentation::InstrumentThreadStack(Thread* thread, bool force_deopt) { instrumentation_stubs_installed_ = true; - InstrumentationInstallStack(thread, this); + InstrumentationInstallStack(thread, this, force_deopt); } // Removes the instrumentation exit pc as the return PC for every quick frame. @@ -548,7 +592,7 @@ void Instrumentation::DeoptimizeAllThreadFrames() { ThreadList* tl = Runtime::Current()->GetThreadList(); tl->ForEach([&](Thread* t) { Locks::mutator_lock_->AssertExclusiveHeld(self); - InstrumentThreadStack(t); + InstrumentThreadStack(t, /* deopt_all_frames= */ true); }); current_force_deopt_id_++; } @@ -800,7 +844,9 @@ void Instrumentation::UpdateStubs() { runtime->GetClassLinker()->VisitClasses(&visitor); instrumentation_stubs_installed_ = true; MutexLock mu(self, *Locks::thread_list_lock_); - runtime->GetThreadList()->ForEach(InstrumentationInstallStack, this); + for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { + InstrumentThreadStack(thread, /* deopt_all_frames= */ false); + } } else { interpreter_stubs_installed_ = false; entry_exit_stubs_installed_ = false; @@ -924,7 +970,8 @@ void Instrumentation::UpdateMethodsCodeImpl(ArtMethod* method, const void* quick // implementation directly and this will confuse the instrumentation trampolines. // TODO We should remove the need for this since it makes it impossible to profile // Proxy.<init> correctly in all cases. - method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init)) { + method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init) && + CodeNeedsEntryExitStub(quick_code)) { new_quick_code = GetQuickInstrumentationEntryPoint(); } else { new_quick_code = quick_code; @@ -1017,7 +1064,12 @@ void Instrumentation::Deoptimize(ArtMethod* method) { // these previously so it will only cover the newly created frames. instrumentation_stubs_installed_ = true; MutexLock mu(self, *Locks::thread_list_lock_); - Runtime::Current()->GetThreadList()->ForEach(InstrumentationInstallStack, this); + for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { + // This isn't a strong deopt. We deopt this method if it is still in the + // deopt methods list. If by the time we hit this frame we no longer need + // a deopt it is safe to continue. So we don't mark the frame. + InstrumentThreadStack(thread, /* deopt_all_frames= */ false); + } } } @@ -1451,28 +1503,8 @@ static char GetRuntimeMethodShorty(Thread* thread) REQUIRES_SHARED(Locks::mutato return shorty; } -TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, - uintptr_t* return_pc_addr, - uint64_t* gpr_result, - uint64_t* fpr_result) { - DCHECK(gpr_result != nullptr); - DCHECK(fpr_result != nullptr); - // Do the pop. - std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack = - self->GetInstrumentationStack(); - CHECK_GT(stack->size(), 0U); - auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr)); - CHECK(it != stack->end()); - InstrumentationStackFrame instrumentation_frame = it->second; - stack->erase(it); - - // Set return PC and check the consistency of the stack. - // We don't cache the return pc value in a local as it may change after - // sending a method exit event. - *return_pc_addr = instrumentation_frame.return_pc_; - self->VerifyStack(); - - ArtMethod* method = instrumentation_frame.method_; +JValue Instrumentation::GetReturnValue( + Thread* self, ArtMethod* method, bool* is_ref, uint64_t* gpr_result, uint64_t* fpr_result) { uint32_t length; const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); char return_shorty; @@ -1503,9 +1535,7 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0]; } - bool is_ref = return_shorty == '[' || return_shorty == 'L'; - StackHandleScope<1> hs(self); - MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); + *is_ref = return_shorty == '[' || return_shorty == 'L'; JValue return_value; if (return_shorty == 'V') { return_value.SetJ(0); @@ -1514,6 +1544,59 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, } else { return_value.SetJ(*gpr_result); } + return return_value; +} + +bool Instrumentation::ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor) { + bool should_deoptimize_frame = false; + const OatQuickMethodHeader* header = visitor.GetCurrentOatQuickMethodHeader(); + if (header != nullptr && header->HasShouldDeoptimizeFlag()) { + uint8_t should_deopt_flag = visitor.GetShouldDeoptimizeFlag(); + // DeoptimizeFlag could be set for debugging or for CHA invalidations. + // Deoptimize here only if it was requested for debugging. CHA + // invalidations are handled in the JITed code. + if ((should_deopt_flag & static_cast<uint8_t>(DeoptimizeFlagValue::kDebug)) != 0) { + should_deoptimize_frame = true; + } + } + return (visitor.caller != nullptr) && + (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) || + self->IsForceInterpreter() || + // NB Since structurally obsolete compiled methods might have the offsets of + // methods/fields compiled in we need to go back to interpreter whenever we hit + // them. + visitor.caller->GetDeclaringClass()->IsObsoleteObject() || + Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller) || + should_deoptimize_frame); +} + +TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, + uintptr_t* return_pc_addr, + uint64_t* gpr_result, + uint64_t* fpr_result) { + DCHECK(gpr_result != nullptr); + DCHECK(fpr_result != nullptr); + // Do the pop. + std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack = + self->GetInstrumentationStack(); + CHECK_GT(stack->size(), 0U); + auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr)); + CHECK(it != stack->end()); + InstrumentationStackFrame instrumentation_frame = it->second; + stack->erase(it); + + // Set return PC and check the consistency of the stack. + // We don't cache the return pc value in a local as it may change after + // sending a method exit event. + *return_pc_addr = instrumentation_frame.return_pc_; + self->VerifyStack(); + + ArtMethod* method = instrumentation_frame.method_; + + bool is_ref; + JValue return_value = GetReturnValue(self, method, &is_ref, gpr_result, fpr_result); + StackHandleScope<1> hs(self); + MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); if (is_ref) { // Take a handle to the return value so we won't lose it if we suspend. res.Assign(return_value.GetL()); @@ -1532,17 +1615,11 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, // back to an upcall. NthCallerVisitor visitor(self, 1, true); visitor.WalkStack(true); - bool deoptimize = (visitor.caller != nullptr) && - (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) || - self->IsForceInterpreter() || - // NB Since structurally obsolete compiled methods might have the offsets of - // methods/fields compiled in we need to go back to interpreter whenever we hit - // them. - visitor.caller->GetDeclaringClass()->IsObsoleteObject() || - // Check if we forced all threads to deoptimize in the time between this frame - // being created and now. - instrumentation_frame.force_deopt_id_ != current_force_deopt_id_ || - Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller)); + // Check if we forced all threads to deoptimize in the time between this frame being created and + // now. + bool should_deoptimize_frame = instrumentation_frame.force_deopt_id_ != current_force_deopt_id_; + bool deoptimize = ShouldDeoptimizeMethod(self, visitor) || should_deoptimize_frame; + if (is_ref) { // Restore the return value if it's a reference since it might have moved. *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get(); @@ -1560,8 +1637,8 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, } DeoptimizationMethodType deopt_method_type = GetDeoptimizationMethodType(method); self->PushDeoptimizationContext(return_value, - return_shorty == 'L' || return_shorty == '[', - /* exception= */ nullptr , + is_ref, + /* exception= */ nullptr, /* from_code= */ false, deopt_method_type); return GetTwoWordSuccessValue(*return_pc_addr, diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h index bdeaf3061c..c49d6728b4 100644 --- a/runtime/instrumentation.h +++ b/runtime/instrumentation.h @@ -17,12 +17,13 @@ #ifndef ART_RUNTIME_INSTRUMENTATION_H_ #define ART_RUNTIME_INSTRUMENTATION_H_ -#include <functional> #include <stdint.h> + +#include <functional> #include <list> #include <memory> -#include <unordered_set> #include <optional> +#include <unordered_set> #include "arch/instruction_set.h" #include "base/enums.h" @@ -30,6 +31,7 @@ #include "base/macros.h" #include "base/safe_map.h" #include "gc_root.h" +#include "offsets.h" namespace art { namespace mirror { @@ -41,6 +43,7 @@ class ArtField; class ArtMethod; template <typename T> class Handle; template <typename T> class MutableHandle; +struct NthCallerVisitor; union JValue; class SHARED_LOCKABLE ReaderWriterMutex; class ShadowFrame; @@ -207,6 +210,10 @@ class Instrumentation { Instrumentation(); + static constexpr MemberOffset NeedsEntryExitHooksOffset() { + return MemberOffset(OFFSETOF_MEMBER(Instrumentation, instrumentation_stubs_installed_)); + } + // Add a listener to be notified of the masked together sent of instrumentation events. This // suspend the runtime to install stubs. You are expected to hold the mutator lock as a proxy // for saying you should have suspended all threads (installing stubs while threads are running @@ -485,6 +492,14 @@ class Instrumentation { void ExceptionHandledEvent(Thread* thread, ObjPtr<mirror::Throwable> exception_object) const REQUIRES_SHARED(Locks::mutator_lock_); + JValue GetReturnValue(Thread* self, + ArtMethod* method, + bool* is_ref, + uint64_t* gpr_result, + uint64_t* fpr_result) REQUIRES_SHARED(Locks::mutator_lock_); + bool ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor) + REQUIRES_SHARED(Locks::mutator_lock_); + // Called when an instrumented method is entered. The intended link register (lr) is saved so // that returning causes a branch to the method exit stub. Generates method enter events. void PushInstrumentationStackFrame(Thread* self, @@ -530,10 +545,13 @@ class Instrumentation { !GetDeoptimizedMethodsLock()); // Install instrumentation exit stub on every method of the stack of the given thread. - // This is used by the debugger to cause a deoptimization of the thread's stack after updating - // local variable(s). - void InstrumentThreadStack(Thread* thread) - REQUIRES(Locks::mutator_lock_); + // This is used by: + // - the debugger to cause a deoptimization of the all frames in thread's stack (for + // example, after updating local variables) + // - to call method entry / exit hooks for tracing. For this we instrument + // the stack frame to run entry / exit hooks but we don't need to deoptimize. + // deopt_all_frames indicates whether the frames need to deoptimize or not. + void InstrumentThreadStack(Thread* thread, bool deopt_all_frames) REQUIRES(Locks::mutator_lock_); // Force all currently running frames to be deoptimized back to interpreter. This should only be // used in cases where basically all compiled code has been invalidated. @@ -557,6 +575,10 @@ class Instrumentation { // False otherwise. bool RequiresInstrumentationInstallation(InstrumentationLevel new_level) const; + // Returns true if we need entry exit stub to call entry hooks. JITed code + // directly call entry / exit hooks and don't need the stub. + bool CodeNeedsEntryExitStub(const void* code); + // Does the job of installing or removing instrumentation code within methods. // In order to support multiple clients using instrumentation at the same time, // the caller must pass a unique key (a string) identifying it so we remind which @@ -751,7 +773,7 @@ class Instrumentation { friend class InstrumentationTest; // For GetCurrentInstrumentationLevel and ConfigureStubs. friend class InstrumentationStackPopper; // For popping instrumentation frames. - friend void InstrumentationInstallStack(Thread*, void*); + friend void InstrumentationInstallStack(Thread*, void*, bool); DISALLOW_COPY_AND_ASSIGN(Instrumentation); }; diff --git a/runtime/oat.h b/runtime/oat.h index 95eb0e14ed..ac70a7755c 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: Inline IRT frame push/pop into JNI stubs. - static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '0', '3', '\0' } }; + // Last oat version changed reason: Introduced new entry points for method entry / exit hooks. + static constexpr std::array<uint8_t, 4> kOatVersion{ {'2', '0', '4', '\0'} }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 5f497af46a..ac5065b2a6 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -599,7 +599,10 @@ void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) { << GetDeoptimizationKindName(kind); DumpFramesWithType(self_, /* details= */ true); } - if (Runtime::Current()->UseJitCompilation()) { + // When deoptimizing for debug support the optimized code is still valid and + // can be reused when debugging support (like breakpoints) are no longer + // needed fot this method. + if (Runtime::Current()->UseJitCompilation() && (kind != DeoptimizationKind::kDebugging)) { Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor( deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader()); } else { diff --git a/runtime/stack.cc b/runtime/stack.cc index 233106eb0a..eb0fe5692d 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -800,6 +800,21 @@ QuickMethodFrameInfo StackVisitor::GetCurrentQuickFrameInfo() const { return RuntimeCalleeSaveFrame::GetMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs); } +uint8_t* StackVisitor::GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(GetCurrentOatQuickMethodHeader()->HasShouldDeoptimizeFlag()); + QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo(); + size_t frame_size = frame_info.FrameSizeInBytes(); + uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); + size_t core_spill_size = + POPCOUNT(frame_info.CoreSpillMask()) * GetBytesPerGprSpillLocation(kRuntimeISA); + size_t fpu_spill_size = + POPCOUNT(frame_info.FpSpillMask()) * GetBytesPerFprSpillLocation(kRuntimeISA); + size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize; + uint8_t* should_deoptimize_addr = sp + offset; + DCHECK_EQ(*should_deoptimize_addr & ~static_cast<uint8_t>(DeoptimizeFlagValue::kAll), 0); + return should_deoptimize_addr; +} + template <StackVisitor::CountTransitions kCount> void StackVisitor::WalkStack(bool include_transitions) { if (check_suspended_) { diff --git a/runtime/stack.h b/runtime/stack.h index 2a6fdc2b35..1b00b54acb 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -17,12 +17,14 @@ #ifndef ART_RUNTIME_STACK_H_ #define ART_RUNTIME_STACK_H_ -#include <optional> #include <stdint.h> + +#include <optional> #include <string> #include "base/locks.h" #include "base/macros.h" +#include "deoptimization_kind.h" #include "obj_ptr.h" #include "quick/quick_method_frame_info.h" #include "stack_map.h" @@ -295,6 +297,15 @@ class StackVisitor { QuickMethodFrameInfo GetCurrentQuickFrameInfo() const REQUIRES_SHARED(Locks::mutator_lock_); + void SetShouldDeoptimizeFlag(DeoptimizeFlagValue value) REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t* should_deoptimize_addr = GetShouldDeoptimizeFlagAddr(); + *should_deoptimize_addr = *should_deoptimize_addr | static_cast<uint8_t>(value); + }; + + uint8_t GetShouldDeoptimizeFlag() const REQUIRES_SHARED(Locks::mutator_lock_) { + return *GetShouldDeoptimizeFlagAddr(); + } + private: // Private constructor known in the case that num_frames_ has already been computed. StackVisitor(Thread* thread, @@ -368,6 +379,8 @@ class StackVisitor { mutable std::pair<const OatQuickMethodHeader*, CodeInfo> cur_inline_info_; mutable std::pair<uintptr_t, StackMap> cur_stack_map_; + uint8_t* GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_); + protected: Context* const context_; const bool check_suspended_; diff --git a/runtime/trace.cc b/runtime/trace.cc index 5996a5720b..4082721fcb 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -421,10 +421,11 @@ void Trace::Start(std::unique_ptr<File>&& trace_file_in, "Sampling profiler thread"); the_trace_->interval_us_ = interval_us; } else { - runtime->GetInstrumentation()->AddListener(the_trace_, - instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kMethodExited | - instrumentation::Instrumentation::kMethodUnwind); + runtime->GetInstrumentation()->AddListener( + the_trace_, + instrumentation::Instrumentation::kMethodEntered | + instrumentation::Instrumentation::kMethodExited | + instrumentation::Instrumentation::kMethodUnwind); // TODO: In full-PIC mode, we don't need to fully deopt. // TODO: We can only use trampoline entrypoints if we are java-debuggable since in that case // we know that inlining and other problematic optimizations are disabled. We might just @@ -480,9 +481,10 @@ void Trace::StopTracing(bool finish_tracing, bool flush_file) { runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr); } else { runtime->GetInstrumentation()->RemoveListener( - the_trace, instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kMethodExited | - instrumentation::Instrumentation::kMethodUnwind); + the_trace, + instrumentation::Instrumentation::kMethodEntered | + instrumentation::Instrumentation::kMethodExited | + instrumentation::Instrumentation::kMethodUnwind); runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey); } } diff --git a/test/2011-stack-walk-concurrent-instrument/src/Main.java b/test/2011-stack-walk-concurrent-instrument/src/Main.java index 8f96f937c9..53a7eea013 100644 --- a/test/2011-stack-walk-concurrent-instrument/src/Main.java +++ b/test/2011-stack-walk-concurrent-instrument/src/Main.java @@ -33,7 +33,7 @@ public class Main { } public native void resetTest(); - public native void waitAndDeopt(Thread t); + public native void waitAndInstrumentStack(Thread t); public native void doSelfStackWalk(); void testConcurrent() throws Exception { @@ -41,7 +41,7 @@ public class Main { final Thread current = Thread.currentThread(); Thread t = new Thread(() -> { try { - this.waitAndDeopt(current); + this.waitAndInstrumentStack(current); } catch (Exception e) { throw new Error("Fail!", e); } diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc index a10fe2e905..5eaaa05dbc 100644 --- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc +++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc @@ -76,7 +76,9 @@ extern "C" JNIEXPORT void JNICALL Java_Main_doSelfStackWalk(JNIEnv*, jobject) { CHECK(sswv.found_g_); CHECK(sswv.found_h_); } -extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobject target) { +extern "C" JNIEXPORT void JNICALL Java_Main_waitAndInstrumentStack(JNIEnv*, + jobject, + jobject target) { while (!instrument_waiting) { } bool timed_out = false; @@ -85,7 +87,8 @@ extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobje CHECK(!timed_out); CHECK(other != nullptr); ScopedSuspendAll ssa(__FUNCTION__); - Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(other); + Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(other, + /* deopt_all_frames= */ false); MutexLock mu(Thread::Current(), *Locks::thread_suspend_count_lock_); bool updated = other->ModifySuspendCount(Thread::Current(), -1, nullptr, SuspendReason::kInternal); CHECK(updated); |