diff options
35 files changed, 851 insertions, 145 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 74efc9ea8d..d455614cfd 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -822,6 +822,31 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); }; +class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction) + : SlowPathCodeARM64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) { @@ -1113,6 +1138,47 @@ void ParallelMoveResolverARM64::EmitMove(size_t index) { codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); } +void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + DataType::Type return_type = method_hook->InputAt(0)->GetType(); + locations->SetInAt(0, ARM64ReturnLocation(return_type)); +} + +void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) { + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireX(); + Register value = temps.AcquireW(); + + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction); + codegen_->AddSlowPath(slow_path); + + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + __ Mov(temp, address + offset); + __ Ldrh(value, MemOperand(temp, 0)); + __ Cbnz(value, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorARM64::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderARM64::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { MacroAssembler* masm = GetVIXLAssembler(); if (GetCompilerOptions().CountHotnessInCompiledCode()) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index d4546e5bd5..750151aa24 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -388,6 +388,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void GenerateIntRemForConstDenom(HRem *instruction); void GenerateIntRemForPower2Denom(HRem *instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenerateMethodEntryExitHook(HInstruction* instruction); // Helpers to set up locations for vector memory operations. Returns the memory operand and, // if used, sets the output parameter scratch to a temporary register used in this operand, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 700202ba20..bf0c77da57 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -971,6 +971,31 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL); }; +class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction) + : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL); +}; + inline vixl32::Condition ARMCondition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -2111,6 +2136,44 @@ void CodeGeneratorARMVIXL::ComputeSpillMask() { } } +void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType())); +} + +void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + + SlowPathCodeARMVIXL* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction); + codegen_->AddSlowPath(slow_path); + + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation()); + __ Mov(temp, address + offset); + __ Ldrh(temp, MemOperand(temp, 0)); + __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(GetVIXLAssembler()); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index b797c30a39..aa40755b29 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -431,6 +431,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemConstantIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenerateMethodEntryExitHook(HInstruction* instruction); vixl::aarch32::MemOperand VecAddress( HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c49b08ba69..a04b4129a8 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -942,6 +942,30 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86); }; +class MethodEntryExitHooksSlowPathX86 : public SlowPathCode { + public: + explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86); +}; + #undef __ // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT @@ -1097,6 +1121,70 @@ static dwarf::Reg DWARFReg(Register reg) { return dwarf::Reg::X86Core(static_cast<int>(reg)); } +void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) { + switch (ret->InputAt(0)->GetType()) { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RegisterLocation(EAX)); + break; + + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX)); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); + break; + + case DataType::Type::kVoid: + locations->SetInAt(0, Location::NoLocation()); + break; + + default: + LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); + } +} + +void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + SetInForReturnValue(method_hook, locations); +} + +void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) { + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction); + codegen_->AddSlowPath(slow_path); + + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + __ cmpw(Address::Absolute(address + offset), Immediate(0)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { Register reg = EAX; @@ -2408,31 +2496,7 @@ void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNU void LocationsBuilderX86::VisitReturn(HReturn* ret) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); - switch (ret->InputAt(0)->GetType()) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - locations->SetInAt(0, Location::RegisterLocation(EAX)); - break; - - case DataType::Type::kInt64: - locations->SetInAt( - 0, Location::RegisterPairLocation(EAX, EDX)); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt( - 0, Location::FpuRegisterLocation(XMM0)); - break; - - default: - LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); - } + SetInForReturnValue(ret, locations); } void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 94f010e598..75c5cebb5e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -344,6 +344,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { bool CpuHasAvxFeatureFlag(); bool CpuHasAvx2FeatureFlag(); + void GenerateMethodEntryExitHook(HInstruction* instruction); + X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dae2ae2b84..4ec2dd7a27 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -965,6 +965,31 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); }; +class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode { + public: + explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction) + : SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPath"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64); +}; + #undef __ // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT @@ -1494,6 +1519,68 @@ static dwarf::Reg DWARFReg(FloatRegister reg) { return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); } +void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) { + new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) { + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction); + codegen_->AddSlowPath(slow_path); + + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + __ movq(CpuRegister(TMP), Immediate(address + offset)); + __ cmpw(Address(CpuRegister(TMP), 0), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) { + switch (instr->InputAt(0)->GetType()) { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RegisterLocation(RAX)); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); + break; + + case DataType::Type::kVoid: + locations->SetInAt(0, Location::NoLocation()); + break; + + default: + LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType(); + } +} + +void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + SetInForReturnValue(method_hook, locations); +} + +void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { NearLabel overflow; @@ -2542,26 +2629,7 @@ void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); - switch (ret->InputAt(0)->GetType()) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RegisterLocation(RAX)); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); - break; - - default: - LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); - } + SetInForReturnValue(ret, locations); } void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 3e601bb97a..1115c8379d 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -276,6 +276,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + void GenerateMethodEntryExitHook(HInstruction* instruction); // Generate a heap reference load using one register `out`: // diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 3abbbae573..c7426828cb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -812,6 +812,11 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, HBasicBlock* bb_cursor) { HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetAllocator()) HShouldDeoptimizeFlag(graph_->GetAllocator(), dex_pc); + // ShouldDeoptimizeFlag is used to perform a deoptimization because of a CHA + // invalidation or for debugging reasons. It is OK to just check for non-zero + // value here instead of the specific CHA value. When a debugging deopt is + // requested we deoptimize before we execute any code and hence we shouldn't + // see that case here. HInstruction* compare = new (graph_->GetAllocator()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); HInstruction* deopt = new (graph_->GetAllocator()) HDeoptimize( diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 390a2bb0be..ed760f190d 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -372,6 +372,9 @@ bool HInstructionBuilder::Build() { if (current_block_->IsEntryBlock()) { InitializeParameters(); AppendInstruction(new (allocator_) HSuspendCheck(0u)); + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodEntryHook(0u)); + } AppendInstruction(new (allocator_) HGoto(0u)); continue; } else if (current_block_->IsExitBlock()) { @@ -822,10 +825,18 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, compilation_stats_, MethodCompilationStat::kConstructorFenceGeneratedFinal); } + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + // Return value is not used for void functions. We pass NullConstant to + // avoid special cases when generating code. + AppendInstruction(new (allocator_) HMethodExitHook(graph_->GetNullConstant(), dex_pc)); + } AppendInstruction(new (allocator_) HReturnVoid(dex_pc)); } else { DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodExitHook(value, dex_pc)); + } AppendInstruction(new (allocator_) HReturn(value, dex_pc)); } current_block_ = nullptr; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 17080f0056..24786931f2 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2913,7 +2913,10 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } else if (current->IsCurrentMethod()) { replacement = outer_graph->GetCurrentMethod(); } else { - DCHECK(current->IsGoto() || current->IsSuspendCheck()); + // It is OK to ignore MethodEntryHook for inlined functions. + // In debug mode we don't inline and in release mode method + // tracing is best effort so OK to ignore them. + DCHECK(current->IsGoto() || current->IsSuspendCheck() || current->IsMethodEntryHook()); entry_block_->RemoveInstruction(current); } if (replacement != nullptr) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 06fb88e837..978e7c419e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -21,6 +21,7 @@ #include <array> #include <type_traits> +#include "art_method.h" #include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/arena_containers.h" @@ -32,7 +33,6 @@ #include "base/quasi_atomic.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" -#include "art_method.h" #include "block_namer.h" #include "class_root.h" #include "compilation_kind.h" @@ -680,7 +680,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { } bool HasShouldDeoptimizeFlag() const { - return number_of_cha_guards_ != 0; + return number_of_cha_guards_ != 0 || debuggable_; } bool HasTryCatch() const { return has_try_catch_; } @@ -1530,6 +1530,8 @@ class HLoopInformationOutwardIterator : public ValueObject { M(LongConstant, Constant) \ M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(MethodEntryHook, Instruction) \ + M(MethodExitHook, Instruction) \ M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ @@ -2991,6 +2993,38 @@ class HExpression<0> : public HInstruction { friend class SsaBuilder; }; +class HMethodEntryHook : public HExpression<0> { + public: + explicit HMethodEntryHook(uint32_t dex_pc) + : HExpression(kMethodEntryHook, SideEffects::All(), dex_pc) {} + + bool NeedsEnvironment() const override { + return true; + } + + DECLARE_INSTRUCTION(MethodEntryHook); + + protected: + DEFAULT_COPY_CONSTRUCTOR(MethodEntryHook); +}; + +class HMethodExitHook : public HExpression<1> { + public: + HMethodExitHook(HInstruction* value, uint32_t dex_pc) + : HExpression(kMethodExitHook, SideEffects::All(), dex_pc) { + SetRawInputAt(0, value); + } + + bool NeedsEnvironment() const override { + return true; + } + + DECLARE_INSTRUCTION(MethodExitHook); + + protected: + DEFAULT_COPY_CONSTRUCTOR(MethodExitHook); +}; + // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow // instruction that branches to the exit block. class HReturnVoid final : public HExpression<0> { diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index 7bcff2bafc..0d7e0e5833 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -505,7 +505,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(64U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(4U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(171 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/openjdkjvmti/deopt_manager.cc b/openjdkjvmti/deopt_manager.cc index bf1b4f0714..cf28a71932 100644 --- a/openjdkjvmti/deopt_manager.cc +++ b/openjdkjvmti/deopt_manager.cc @@ -492,7 +492,12 @@ void DeoptManager::DeoptimizeThread(art::Thread* target) { art::gc::GcCause::kGcCauseDebugger, art::gc::CollectorType::kCollectorTypeDebugger); art::ScopedSuspendAll ssa("Instrument thread stack"); - art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(target); + // Prepare the stack so methods can be deoptimized as and when required. + // This by itself doesn't cause any methods to deoptimize but enables + // deoptimization on demand. + art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack( + target, + /* deopt_all_frames= */ false); } extern DeoptManager* gDeoptManager; diff --git a/openjdkjvmti/ti_heap.cc b/openjdkjvmti/ti_heap.cc index 27fed282aa..bd9d2ddd08 100644 --- a/openjdkjvmti/ti_heap.cc +++ b/openjdkjvmti/ti_heap.cc @@ -1780,7 +1780,7 @@ static void ReplaceStrongRoots(art::Thread* self, const ObjectMap& map) // already have. // TODO We technically only need to do this if the frames are not already being interpreted. // The cost for doing an extra stack walk is unlikely to be worth it though. - instr->InstrumentThreadStack(t); + instr->InstrumentThreadStack(t, /* deopt_all_frames= */ true); } } } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index f5f127472e..5ef1d3e17a 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2525,3 +2525,36 @@ ENTRY art_quick_compile_optimized // artCompileOptimized doesn't allow thread suspension. blx lr END art_quick_compile_optimized + +// On entry, method is at the bottom of the stack. +ENTRY art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME r0 + ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod + mov r1, rSELF @ pass Thread::Current + bl artMethodEntryHook @ (ArtMethod*, Thread*) + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + blx lr +END art_quick_method_entry_hook + +ENTRY art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME r2 + + add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame + add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame + ldr r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod* + mov r0, rSELF @ pass Thread::Current + blx artMethodExitHook @ (Thread*, ArtMethod*, gpr_res*, fpr_res*) + + .cfi_remember_state + cbnz r0, .Ldo_deliver_instrumentation_exception_exit @ Deliver exception + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + blx lr +.Ldo_deliver_instrumentation_exception_exit: + .cfi_restore_state + .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END art_quick_method_exit_hook diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 022a0e4053..e5dbeda42d 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2630,3 +2630,40 @@ ENTRY art_quick_compile_optimized // artCompileOptimized doesn't allow thread suspension. ret END art_quick_compile_optimized + + .extern artMethodEntryHook +ENTRY art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME + + ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod* + mov x1, xSELF // pass Thread::Current + bl artMethodEntryHook // (ArtMethod*, Thread*) + + RESTORE_SAVE_EVERYTHING_FRAME // Note: will restore xSELF + REFRESH_MARKING_REGISTER + ret +END art_quick_method_entry_hook + + .extern artMethodExitHook +ENTRY art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME + + add x3, sp, #16 // floating-point result ptr in kSaveEverything frame + add x2, sp, #272 // integer result ptr in kSaveEverything frame + ldr x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // ArtMethod* + mov x0, xSELF // Thread::Current + bl artMethodExitHook // (Thread*, ArtMethod*, gpr_res*, fpr_res*) + + .cfi_remember_state + cbnz x0, .Ldo_deliver_instrumentation_exception_exit // Handle exception + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + ret +.Ldo_deliver_instrumentation_exception_exit: + .cfi_restore_state + .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END art_quick_method_exit_hook + diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index cda98d2921..2f6af4f5de 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -2381,3 +2381,62 @@ DEFINE_FUNCTION art_quick_compile_optimized RESTORE_SAVE_EVERYTHING_FRAME ret END_FUNCTION art_quick_compile_optimized + +DEFINE_FUNCTION art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME edx + mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %eax // Fetch ArtMethod + subl LITERAL(8), %esp + CFI_ADJUST_CFA_OFFSET(8) + + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + CFI_ADJUST_CFA_OFFSET(4) + pushl %eax // Pass Method*. + CFI_ADJUST_CFA_OFFSET(4) + + call SYMBOL(artMethodEntryHook) // (Method*, Thread*) + + addl LITERAL(16), %esp // Pop arguments. + CFI_ADJUST_CFA_OFFSET(-16) + + RESTORE_SAVE_EVERYTHING_FRAME + ret +END_FUNCTION art_quick_method_entry_hook + +DEFINE_FUNCTION art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME ebx + + mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %ebx // Remember ArtMethod* + subl LITERAL(8), %esp // Align stack. + CFI_ADJUST_CFA_OFFSET(8) + PUSH_ARG edx // Save gpr return value. edx and eax need to be together + // which isn't the case in kSaveEverything frame. + PUSH_ARG eax + movl %esp, %edx // Get pointer to gpr_result + leal 32(%esp), %eax // Get pointer to fpr_result, in kSaveEverything frame + PUSH_ARG eax // Pass fpr_result + PUSH_ARG edx // Pass gpr_result + PUSH_ARG ebx // Pass ArtMethod* + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current. + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artMethodExitHook) // (Thread*, ArtMethod*, gpr_result*, fpr_result*) + + // Return result could have been changed if it's a reference. + movl 16(%esp), %ecx + movl %ecx, (80+32)(%esp) + addl LITERAL(32), %esp // Pop arguments and grp_result. + CFI_ADJUST_CFA_OFFSET(-32) + + cmpl LITERAL(1), %eax // Check if we returned error. + CFI_REMEMBER_STATE + je .Ldo_deliver_instrumentation_exception_exit + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + ret +.Ldo_deliver_instrumentation_exception_exit: + CFI_RESTORE_STATE_AND_DEF_CFA esp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_FUNCTION art_quick_method_exit_hook + + + diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 8c21384c62..136198fe55 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -2208,3 +2208,40 @@ DEFINE_FUNCTION art_quick_compile_optimized RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address ret END_FUNCTION art_quick_compile_optimized + +// On entry, method is at the bottom of the stack. +DEFINE_FUNCTION art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME + + movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + + call SYMBOL(artMethodEntryHook) // (ArtMethod*, Thread*) + + RESTORE_SAVE_EVERYTHING_FRAME + ret +END_FUNCTION art_quick_method_entry_hook + +// On entry, method is at the bottom of the stack. +// and r8 has should_deopt_frame value. +DEFINE_FUNCTION art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME + + leaq 16(%rsp), %rcx // floating-point result pointer in kSaveEverything + // frame + leaq 144(%rsp), %rdx // integer result pointer in kSaveEverything frame + movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod + movq %gs:THREAD_SELF_OFFSET, %rdi // Thread::Current + call SYMBOL(artMethodExitHook) // (Thread*, SP, gpr_res*, fpr_res*) + + cmpq LITERAL(1), %rax + CFI_REMEMBER_STATE + je .Ldo_deliver_instrumentation_exception_exit + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + ret +.Ldo_deliver_instrumentation_exception_exit: + CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_FUNCTION art_quick_method_entry_hook diff --git a/runtime/cha.cc b/runtime/cha.cc index c345af8232..392b35cd81 100644 --- a/runtime/cha.cc +++ b/runtime/cha.cc @@ -219,27 +219,12 @@ class CHAStackVisitor final : public StackVisitor { } // The compiled code on stack is not valid anymore. Need to deoptimize. - SetShouldDeoptimizeFlag(); + SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kCHA); return true; } private: - void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) { - QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo(); - size_t frame_size = frame_info.FrameSizeInBytes(); - uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); - size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) * - GetBytesPerGprSpillLocation(kRuntimeISA); - size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) * - GetBytesPerFprSpillLocation(kRuntimeISA); - size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize; - uint8_t* should_deoptimize_addr = sp + offset; - // Set deoptimization flag to 1. - DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1); - *should_deoptimize_addr = 1; - } - // Set of method headers for compiled code that should be deoptimized. const std::unordered_set<OatQuickMethodHeader*>& method_headers_; diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h index 5be6f3dab1..c2e6a6585a 100644 --- a/runtime/deoptimization_kind.h +++ b/runtime/deoptimization_kind.h @@ -29,6 +29,7 @@ enum class DeoptimizationKind { kLoopNullBCE, kBlockBCE, kCHA, + kDebugging, kFullFrame, kLast = kFullFrame }; @@ -42,6 +43,7 @@ inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null"; case DeoptimizationKind::kBlockBCE: return "block bounds check elimination"; case DeoptimizationKind::kCHA: return "class hierarchy analysis"; + case DeoptimizationKind::kDebugging: return "Deopt requested for debug support"; case DeoptimizationKind::kFullFrame: return "full frame"; } LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind); @@ -50,6 +52,15 @@ inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind); +// We use a DeoptimizationStackSlot to record if a deoptimization is required +// for functions that are already on stack. The value in the slot specifies the +// reason we need to deoptimize. +enum class DeoptimizeFlagValue: uint8_t { + kCHA = 0b01, + kDebug = 0b10, + kAll = kCHA | kDebug +}; + } // namespace art #endif // ART_RUNTIME_DEOPTIMIZATION_KIND_H_ diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index f3fc97eca2..3fc23ee66e 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -131,6 +131,10 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp qpoints->pUpdateInlineCache = art_quick_update_inline_cache; qpoints->pCompileOptimized = art_quick_compile_optimized; + // Tracing hooks + qpoints->pMethodEntryHook = art_quick_method_entry_hook; + qpoints->pMethodExitHook = art_quick_method_exit_hook; + bool should_report = false; PaletteShouldReportJniInvocations(&should_report); if (should_report) { diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index 5deb55752f..f69ab1d38b 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -208,6 +208,8 @@ V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \ V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \ \ + V(MethodEntryHook, void, ArtMethod*, Thread*) \ + V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*) #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ // #define is only for lint. diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 3279f7d738..be9d949930 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -60,6 +60,9 @@ namespace art { +extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self); +extern "C" NO_RETURN void artDeoptimize(Thread* self); + // Visits the arguments as saved to the stack by a CalleeSaveType::kRefAndArgs callee save frame. class QuickArgumentVisitor { // Number of bytes for each out register in the caller method's frame. @@ -2588,4 +2591,74 @@ extern "C" uint64_t artInvokeCustom(uint32_t call_site_idx, Thread* self, ArtMet return result.GetJ(); } +extern "C" void artMethodEntryHook(ArtMethod* method, Thread* self, ArtMethod** sp ATTRIBUTE_UNUSED) + REQUIRES_SHARED(Locks::mutator_lock_) { + instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation(); + instr->MethodEnterEvent(self, method); + if (instr->IsDeoptimized(method)) { + // Instrumentation can request deoptimizing only a particular method (for + // ex: when there are break points on the method). In such cases deoptimize + // only this method. FullFrame deoptimizations are handled on method exits. + artDeoptimizeFromCompiledCode(DeoptimizationKind::kDebugging, self); + } +} + +extern "C" int artMethodExitHook(Thread* self, + ArtMethod* method, + uint64_t* gpr_result, + uint64_t* fpr_result) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current())); + CHECK(gpr_result != nullptr); + CHECK(fpr_result != nullptr); + // Instrumentation exit stub must not be entered with a pending exception. + CHECK(!self->IsExceptionPending()) + << "Enter instrumentation exit stub with pending exception " << self->GetException()->Dump(); + + instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation(); + bool is_ref; + JValue return_value = instr->GetReturnValue(self, method, &is_ref, gpr_result, fpr_result); + bool deoptimize = false; + { + StackHandleScope<1> hs(self); + MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); + if (is_ref) { + // Take a handle to the return value so we won't lose it if we suspend. + res.Assign(return_value.GetL()); + } + uint32_t dex_pc = dex::kDexNoIndex; + DCHECK(!method->IsRuntimeMethod()); + instr->MethodExitEvent(self, + ObjPtr<mirror::Object>(), + method, + dex_pc, + /* frame= */ {}, + return_value); + + // Deoptimize if the caller needs to continue execution in the interpreter. Do nothing if we get + // back to an upcall. + NthCallerVisitor visitor(self, 1, true); + visitor.WalkStack(true); + deoptimize = instr->ShouldDeoptimizeMethod(self, visitor); + + if (is_ref) { + // Restore the return value if it's a reference since it might have moved. + *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get(); + } + } + + if (self->IsExceptionPending() || self->ObserveAsyncException()) { + return 1; + } + + if (deoptimize) { + DeoptimizationMethodType deopt_method_type = instr->GetDeoptimizationMethodType(method); + self->PushDeoptimizationContext(return_value, is_ref, nullptr, false, deopt_method_type); + artDeoptimize(self); + UNREACHABLE(); + } + + return 0; +} + } // namespace art diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index d2096ec2f1..c4e62e5b87 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -96,6 +96,8 @@ static inline const void* GetQuickInstrumentationExitPc() { extern "C" void* art_quick_string_builder_append(uint32_t format); extern "C" void art_quick_compile_optimized(ArtMethod*, Thread*); +extern "C" void art_quick_method_entry_hook(ArtMethod*, Thread*); +extern "C" int32_t art_quick_method_exit_hook(Thread*, ArtMethod*, uint64_t*, uint64_t*); } // namespace art diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 73f97bc60b..b515245a12 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -405,9 +405,13 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow, sizeof(void*)); + EXPECT_OFFSET_DIFFNP( + QuickEntryPoints, pReadBarrierForRootSlow, pMethodEntryHook, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodEntryHook, pMethodExitHook, sizeof(void*)); - CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierForRootSlow) - + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all); + CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pMethodExitHook) + sizeof(void*) == + sizeof(QuickEntryPoints), + QuickEntryPoints_all); } }; diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index 97dad8cef7..91c30c7ba7 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -231,6 +231,26 @@ bool Instrumentation::NeedDebugVersionFor(ArtMethod* method) const !method->IsProxyMethod(); } +bool Instrumentation::CodeNeedsEntryExitStub(const void* code) { + // In some tests runtime isn't setup fully and hence the entry points could + // be nullptr. + if (code == nullptr) { + return true; + } + // When jiting code for debuggable apps we generate the code to call method + // entry / exit hooks when required. Hence it is not required to update + // to instrumentation entry point for JITed code in debuggable mode. + if (!Runtime::Current()->IsJavaDebuggable()) { + return true; + } + + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) { + return false; + } + return true; +} + void Instrumentation::InstallStubsForMethod(ArtMethod* method) { if (!method->IsInvokable() || method->IsProxyMethod()) { // Do not change stubs for these methods. @@ -274,7 +294,12 @@ void Instrumentation::InstallStubsForMethod(ArtMethod* method) { if (entry_exit_stubs_installed_) { // This needs to be checked first since the instrumentation entrypoint will be able to // find the actual JIT compiled code that corresponds to this method. - new_quick_code = GetQuickInstrumentationEntryPoint(); + const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize); + if (CodeNeedsEntryExitStub(code)) { + new_quick_code = GetQuickInstrumentationEntryPoint(); + } else { + new_quick_code = code; + } } else if (NeedDebugVersionFor(method)) { // It would be great to search the JIT for its implementation here but we cannot due to // the locks we hold. Instead just set to the interpreter bridge and that code will search @@ -292,23 +317,30 @@ void Instrumentation::InstallStubsForMethod(ArtMethod* method) { } // Places the instrumentation exit pc as the return PC for every quick frame. This also allows -// deoptimization of quick frames to interpreter frames. +// deoptimization of quick frames to interpreter frames. When force_deopt is +// true the frames have to be deoptimized. If the frame has a deoptimization +// stack slot (all Jited frames), it is set to true to indicate this. For frames +// that do not have this slot, the force_deopt_id on the InstrumentationStack is +// used to check if the frame needs to be deoptimized. When force_deopt is false +// we just instrument the stack for method entry / exit hooks. // Since we may already have done this previously, we need to push new instrumentation frame before // existing instrumentation frames. -void InstrumentationInstallStack(Thread* thread, void* arg) +void InstrumentationInstallStack(Thread* thread, void* arg, bool deopt_all_frames) REQUIRES(Locks::mutator_lock_) { Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current()); struct InstallStackVisitor final : public StackVisitor { InstallStackVisitor(Thread* thread_in, Context* context, uintptr_t instrumentation_exit_pc, - uint64_t force_deopt_id) + uint64_t force_deopt_id, + bool deopt_all_frames) : StackVisitor(thread_in, context, kInstrumentationStackWalk), instrumentation_stack_(thread_in->GetInstrumentationStack()), instrumentation_exit_pc_(instrumentation_exit_pc), reached_existing_instrumentation_frames_(false), last_return_pc_(0), - force_deopt_id_(force_deopt_id) {} + force_deopt_id_(force_deopt_id), + deopt_all_frames_(deopt_all_frames) {} bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) { ArtMethod* m = GetMethod(); @@ -366,6 +398,15 @@ void InstrumentationInstallStack(Thread* thread, void* arg) LOG(INFO) << "Ignoring already instrumented " << frame.Dump(); } } else { + // If it is a JITed frame then just set the deopt bit if required + // otherwise continue + const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader(); + if (deopt_all_frames_ && + method_header != nullptr && + method_header->HasShouldDeoptimizeFlag()) { + SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kDebug); + return true; + } CHECK_NE(return_pc, 0U); if (UNLIKELY(reached_existing_instrumentation_frames_ && !m->IsRuntimeMethod())) { // We already saw an existing instrumentation frame so this should be a runtime-method @@ -373,9 +414,8 @@ void InstrumentationInstallStack(Thread* thread, void* arg) std::string thread_name; GetThread()->GetThreadName(thread_name); uint32_t dex_pc = dex::kDexNoIndex; - if (last_return_pc_ != 0 && GetCurrentOatQuickMethodHeader() != nullptr) { - dex_pc = GetCurrentOatQuickMethodHeader()->ToDexPc( - GetCurrentQuickFrame(), last_return_pc_); + if (last_return_pc_ != 0 && method_header != nullptr) { + dex_pc = method_header->ToDexPc(GetCurrentQuickFrame(), last_return_pc_); } LOG(FATAL) << "While walking " << thread_name << " found unexpected non-runtime method" << " without instrumentation exit return or interpreter frame." @@ -413,6 +453,7 @@ void InstrumentationInstallStack(Thread* thread, void* arg) bool reached_existing_instrumentation_frames_; uintptr_t last_return_pc_; uint64_t force_deopt_id_; + bool deopt_all_frames_; }; if (kVerboseInstrumentation) { std::string thread_name; @@ -423,8 +464,11 @@ void InstrumentationInstallStack(Thread* thread, void* arg) Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg); std::unique_ptr<Context> context(Context::Create()); uintptr_t instrumentation_exit_pc = reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()); - InstallStackVisitor visitor( - thread, context.get(), instrumentation_exit_pc, instrumentation->current_force_deopt_id_); + InstallStackVisitor visitor(thread, + context.get(), + instrumentation_exit_pc, + instrumentation->current_force_deopt_id_, + deopt_all_frames); visitor.WalkStack(true); CHECK_EQ(visitor.dex_pcs_.size(), thread->GetInstrumentationStack()->size()); @@ -449,9 +493,9 @@ void InstrumentationInstallStack(Thread* thread, void* arg) thread->VerifyStack(); } -void Instrumentation::InstrumentThreadStack(Thread* thread) { +void Instrumentation::InstrumentThreadStack(Thread* thread, bool force_deopt) { instrumentation_stubs_installed_ = true; - InstrumentationInstallStack(thread, this); + InstrumentationInstallStack(thread, this, force_deopt); } // Removes the instrumentation exit pc as the return PC for every quick frame. @@ -548,7 +592,7 @@ void Instrumentation::DeoptimizeAllThreadFrames() { ThreadList* tl = Runtime::Current()->GetThreadList(); tl->ForEach([&](Thread* t) { Locks::mutator_lock_->AssertExclusiveHeld(self); - InstrumentThreadStack(t); + InstrumentThreadStack(t, /* deopt_all_frames= */ true); }); current_force_deopt_id_++; } @@ -800,7 +844,9 @@ void Instrumentation::UpdateStubs() { runtime->GetClassLinker()->VisitClasses(&visitor); instrumentation_stubs_installed_ = true; MutexLock mu(self, *Locks::thread_list_lock_); - runtime->GetThreadList()->ForEach(InstrumentationInstallStack, this); + for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { + InstrumentThreadStack(thread, /* deopt_all_frames= */ false); + } } else { interpreter_stubs_installed_ = false; entry_exit_stubs_installed_ = false; @@ -924,7 +970,8 @@ void Instrumentation::UpdateMethodsCodeImpl(ArtMethod* method, const void* quick // implementation directly and this will confuse the instrumentation trampolines. // TODO We should remove the need for this since it makes it impossible to profile // Proxy.<init> correctly in all cases. - method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init)) { + method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init) && + CodeNeedsEntryExitStub(quick_code)) { new_quick_code = GetQuickInstrumentationEntryPoint(); } else { new_quick_code = quick_code; @@ -1017,7 +1064,12 @@ void Instrumentation::Deoptimize(ArtMethod* method) { // these previously so it will only cover the newly created frames. instrumentation_stubs_installed_ = true; MutexLock mu(self, *Locks::thread_list_lock_); - Runtime::Current()->GetThreadList()->ForEach(InstrumentationInstallStack, this); + for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { + // This isn't a strong deopt. We deopt this method if it is still in the + // deopt methods list. If by the time we hit this frame we no longer need + // a deopt it is safe to continue. So we don't mark the frame. + InstrumentThreadStack(thread, /* deopt_all_frames= */ false); + } } } @@ -1451,28 +1503,8 @@ static char GetRuntimeMethodShorty(Thread* thread) REQUIRES_SHARED(Locks::mutato return shorty; } -TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, - uintptr_t* return_pc_addr, - uint64_t* gpr_result, - uint64_t* fpr_result) { - DCHECK(gpr_result != nullptr); - DCHECK(fpr_result != nullptr); - // Do the pop. - std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack = - self->GetInstrumentationStack(); - CHECK_GT(stack->size(), 0U); - auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr)); - CHECK(it != stack->end()); - InstrumentationStackFrame instrumentation_frame = it->second; - stack->erase(it); - - // Set return PC and check the consistency of the stack. - // We don't cache the return pc value in a local as it may change after - // sending a method exit event. - *return_pc_addr = instrumentation_frame.return_pc_; - self->VerifyStack(); - - ArtMethod* method = instrumentation_frame.method_; +JValue Instrumentation::GetReturnValue( + Thread* self, ArtMethod* method, bool* is_ref, uint64_t* gpr_result, uint64_t* fpr_result) { uint32_t length; const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); char return_shorty; @@ -1503,9 +1535,7 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0]; } - bool is_ref = return_shorty == '[' || return_shorty == 'L'; - StackHandleScope<1> hs(self); - MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); + *is_ref = return_shorty == '[' || return_shorty == 'L'; JValue return_value; if (return_shorty == 'V') { return_value.SetJ(0); @@ -1514,6 +1544,59 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, } else { return_value.SetJ(*gpr_result); } + return return_value; +} + +bool Instrumentation::ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor) { + bool should_deoptimize_frame = false; + const OatQuickMethodHeader* header = visitor.GetCurrentOatQuickMethodHeader(); + if (header != nullptr && header->HasShouldDeoptimizeFlag()) { + uint8_t should_deopt_flag = visitor.GetShouldDeoptimizeFlag(); + // DeoptimizeFlag could be set for debugging or for CHA invalidations. + // Deoptimize here only if it was requested for debugging. CHA + // invalidations are handled in the JITed code. + if ((should_deopt_flag & static_cast<uint8_t>(DeoptimizeFlagValue::kDebug)) != 0) { + should_deoptimize_frame = true; + } + } + return (visitor.caller != nullptr) && + (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) || + self->IsForceInterpreter() || + // NB Since structurally obsolete compiled methods might have the offsets of + // methods/fields compiled in we need to go back to interpreter whenever we hit + // them. + visitor.caller->GetDeclaringClass()->IsObsoleteObject() || + Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller) || + should_deoptimize_frame); +} + +TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, + uintptr_t* return_pc_addr, + uint64_t* gpr_result, + uint64_t* fpr_result) { + DCHECK(gpr_result != nullptr); + DCHECK(fpr_result != nullptr); + // Do the pop. + std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack = + self->GetInstrumentationStack(); + CHECK_GT(stack->size(), 0U); + auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr)); + CHECK(it != stack->end()); + InstrumentationStackFrame instrumentation_frame = it->second; + stack->erase(it); + + // Set return PC and check the consistency of the stack. + // We don't cache the return pc value in a local as it may change after + // sending a method exit event. + *return_pc_addr = instrumentation_frame.return_pc_; + self->VerifyStack(); + + ArtMethod* method = instrumentation_frame.method_; + + bool is_ref; + JValue return_value = GetReturnValue(self, method, &is_ref, gpr_result, fpr_result); + StackHandleScope<1> hs(self); + MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); if (is_ref) { // Take a handle to the return value so we won't lose it if we suspend. res.Assign(return_value.GetL()); @@ -1532,17 +1615,11 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, // back to an upcall. NthCallerVisitor visitor(self, 1, true); visitor.WalkStack(true); - bool deoptimize = (visitor.caller != nullptr) && - (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) || - self->IsForceInterpreter() || - // NB Since structurally obsolete compiled methods might have the offsets of - // methods/fields compiled in we need to go back to interpreter whenever we hit - // them. - visitor.caller->GetDeclaringClass()->IsObsoleteObject() || - // Check if we forced all threads to deoptimize in the time between this frame - // being created and now. - instrumentation_frame.force_deopt_id_ != current_force_deopt_id_ || - Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller)); + // Check if we forced all threads to deoptimize in the time between this frame being created and + // now. + bool should_deoptimize_frame = instrumentation_frame.force_deopt_id_ != current_force_deopt_id_; + bool deoptimize = ShouldDeoptimizeMethod(self, visitor) || should_deoptimize_frame; + if (is_ref) { // Restore the return value if it's a reference since it might have moved. *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get(); @@ -1560,8 +1637,8 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, } DeoptimizationMethodType deopt_method_type = GetDeoptimizationMethodType(method); self->PushDeoptimizationContext(return_value, - return_shorty == 'L' || return_shorty == '[', - /* exception= */ nullptr , + is_ref, + /* exception= */ nullptr, /* from_code= */ false, deopt_method_type); return GetTwoWordSuccessValue(*return_pc_addr, diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h index bdeaf3061c..c49d6728b4 100644 --- a/runtime/instrumentation.h +++ b/runtime/instrumentation.h @@ -17,12 +17,13 @@ #ifndef ART_RUNTIME_INSTRUMENTATION_H_ #define ART_RUNTIME_INSTRUMENTATION_H_ -#include <functional> #include <stdint.h> + +#include <functional> #include <list> #include <memory> -#include <unordered_set> #include <optional> +#include <unordered_set> #include "arch/instruction_set.h" #include "base/enums.h" @@ -30,6 +31,7 @@ #include "base/macros.h" #include "base/safe_map.h" #include "gc_root.h" +#include "offsets.h" namespace art { namespace mirror { @@ -41,6 +43,7 @@ class ArtField; class ArtMethod; template <typename T> class Handle; template <typename T> class MutableHandle; +struct NthCallerVisitor; union JValue; class SHARED_LOCKABLE ReaderWriterMutex; class ShadowFrame; @@ -207,6 +210,10 @@ class Instrumentation { Instrumentation(); + static constexpr MemberOffset NeedsEntryExitHooksOffset() { + return MemberOffset(OFFSETOF_MEMBER(Instrumentation, instrumentation_stubs_installed_)); + } + // Add a listener to be notified of the masked together sent of instrumentation events. This // suspend the runtime to install stubs. You are expected to hold the mutator lock as a proxy // for saying you should have suspended all threads (installing stubs while threads are running @@ -485,6 +492,14 @@ class Instrumentation { void ExceptionHandledEvent(Thread* thread, ObjPtr<mirror::Throwable> exception_object) const REQUIRES_SHARED(Locks::mutator_lock_); + JValue GetReturnValue(Thread* self, + ArtMethod* method, + bool* is_ref, + uint64_t* gpr_result, + uint64_t* fpr_result) REQUIRES_SHARED(Locks::mutator_lock_); + bool ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor) + REQUIRES_SHARED(Locks::mutator_lock_); + // Called when an instrumented method is entered. The intended link register (lr) is saved so // that returning causes a branch to the method exit stub. Generates method enter events. void PushInstrumentationStackFrame(Thread* self, @@ -530,10 +545,13 @@ class Instrumentation { !GetDeoptimizedMethodsLock()); // Install instrumentation exit stub on every method of the stack of the given thread. - // This is used by the debugger to cause a deoptimization of the thread's stack after updating - // local variable(s). - void InstrumentThreadStack(Thread* thread) - REQUIRES(Locks::mutator_lock_); + // This is used by: + // - the debugger to cause a deoptimization of the all frames in thread's stack (for + // example, after updating local variables) + // - to call method entry / exit hooks for tracing. For this we instrument + // the stack frame to run entry / exit hooks but we don't need to deoptimize. + // deopt_all_frames indicates whether the frames need to deoptimize or not. + void InstrumentThreadStack(Thread* thread, bool deopt_all_frames) REQUIRES(Locks::mutator_lock_); // Force all currently running frames to be deoptimized back to interpreter. This should only be // used in cases where basically all compiled code has been invalidated. @@ -557,6 +575,10 @@ class Instrumentation { // False otherwise. bool RequiresInstrumentationInstallation(InstrumentationLevel new_level) const; + // Returns true if we need entry exit stub to call entry hooks. JITed code + // directly call entry / exit hooks and don't need the stub. + bool CodeNeedsEntryExitStub(const void* code); + // Does the job of installing or removing instrumentation code within methods. // In order to support multiple clients using instrumentation at the same time, // the caller must pass a unique key (a string) identifying it so we remind which @@ -751,7 +773,7 @@ class Instrumentation { friend class InstrumentationTest; // For GetCurrentInstrumentationLevel and ConfigureStubs. friend class InstrumentationStackPopper; // For popping instrumentation frames. - friend void InstrumentationInstallStack(Thread*, void*); + friend void InstrumentationInstallStack(Thread*, void*, bool); DISALLOW_COPY_AND_ASSIGN(Instrumentation); }; diff --git a/runtime/oat.h b/runtime/oat.h index 95eb0e14ed..ac70a7755c 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: Inline IRT frame push/pop into JNI stubs. - static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '0', '3', '\0' } }; + // Last oat version changed reason: Introduced new entry points for method entry / exit hooks. + static constexpr std::array<uint8_t, 4> kOatVersion{ {'2', '0', '4', '\0'} }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 5f497af46a..ac5065b2a6 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -599,7 +599,10 @@ void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) { << GetDeoptimizationKindName(kind); DumpFramesWithType(self_, /* details= */ true); } - if (Runtime::Current()->UseJitCompilation()) { + // When deoptimizing for debug support the optimized code is still valid and + // can be reused when debugging support (like breakpoints) are no longer + // needed fot this method. + if (Runtime::Current()->UseJitCompilation() && (kind != DeoptimizationKind::kDebugging)) { Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor( deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader()); } else { diff --git a/runtime/stack.cc b/runtime/stack.cc index 233106eb0a..eb0fe5692d 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -800,6 +800,21 @@ QuickMethodFrameInfo StackVisitor::GetCurrentQuickFrameInfo() const { return RuntimeCalleeSaveFrame::GetMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs); } +uint8_t* StackVisitor::GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(GetCurrentOatQuickMethodHeader()->HasShouldDeoptimizeFlag()); + QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo(); + size_t frame_size = frame_info.FrameSizeInBytes(); + uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); + size_t core_spill_size = + POPCOUNT(frame_info.CoreSpillMask()) * GetBytesPerGprSpillLocation(kRuntimeISA); + size_t fpu_spill_size = + POPCOUNT(frame_info.FpSpillMask()) * GetBytesPerFprSpillLocation(kRuntimeISA); + size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize; + uint8_t* should_deoptimize_addr = sp + offset; + DCHECK_EQ(*should_deoptimize_addr & ~static_cast<uint8_t>(DeoptimizeFlagValue::kAll), 0); + return should_deoptimize_addr; +} + template <StackVisitor::CountTransitions kCount> void StackVisitor::WalkStack(bool include_transitions) { if (check_suspended_) { diff --git a/runtime/stack.h b/runtime/stack.h index 2a6fdc2b35..1b00b54acb 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -17,12 +17,14 @@ #ifndef ART_RUNTIME_STACK_H_ #define ART_RUNTIME_STACK_H_ -#include <optional> #include <stdint.h> + +#include <optional> #include <string> #include "base/locks.h" #include "base/macros.h" +#include "deoptimization_kind.h" #include "obj_ptr.h" #include "quick/quick_method_frame_info.h" #include "stack_map.h" @@ -295,6 +297,15 @@ class StackVisitor { QuickMethodFrameInfo GetCurrentQuickFrameInfo() const REQUIRES_SHARED(Locks::mutator_lock_); + void SetShouldDeoptimizeFlag(DeoptimizeFlagValue value) REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t* should_deoptimize_addr = GetShouldDeoptimizeFlagAddr(); + *should_deoptimize_addr = *should_deoptimize_addr | static_cast<uint8_t>(value); + }; + + uint8_t GetShouldDeoptimizeFlag() const REQUIRES_SHARED(Locks::mutator_lock_) { + return *GetShouldDeoptimizeFlagAddr(); + } + private: // Private constructor known in the case that num_frames_ has already been computed. StackVisitor(Thread* thread, @@ -368,6 +379,8 @@ class StackVisitor { mutable std::pair<const OatQuickMethodHeader*, CodeInfo> cur_inline_info_; mutable std::pair<uintptr_t, StackMap> cur_stack_map_; + uint8_t* GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_); + protected: Context* const context_; const bool check_suspended_; diff --git a/runtime/trace.cc b/runtime/trace.cc index 5996a5720b..4082721fcb 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -421,10 +421,11 @@ void Trace::Start(std::unique_ptr<File>&& trace_file_in, "Sampling profiler thread"); the_trace_->interval_us_ = interval_us; } else { - runtime->GetInstrumentation()->AddListener(the_trace_, - instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kMethodExited | - instrumentation::Instrumentation::kMethodUnwind); + runtime->GetInstrumentation()->AddListener( + the_trace_, + instrumentation::Instrumentation::kMethodEntered | + instrumentation::Instrumentation::kMethodExited | + instrumentation::Instrumentation::kMethodUnwind); // TODO: In full-PIC mode, we don't need to fully deopt. // TODO: We can only use trampoline entrypoints if we are java-debuggable since in that case // we know that inlining and other problematic optimizations are disabled. We might just @@ -480,9 +481,10 @@ void Trace::StopTracing(bool finish_tracing, bool flush_file) { runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr); } else { runtime->GetInstrumentation()->RemoveListener( - the_trace, instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kMethodExited | - instrumentation::Instrumentation::kMethodUnwind); + the_trace, + instrumentation::Instrumentation::kMethodEntered | + instrumentation::Instrumentation::kMethodExited | + instrumentation::Instrumentation::kMethodUnwind); runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey); } } diff --git a/test/2011-stack-walk-concurrent-instrument/src/Main.java b/test/2011-stack-walk-concurrent-instrument/src/Main.java index 8f96f937c9..53a7eea013 100644 --- a/test/2011-stack-walk-concurrent-instrument/src/Main.java +++ b/test/2011-stack-walk-concurrent-instrument/src/Main.java @@ -33,7 +33,7 @@ public class Main { } public native void resetTest(); - public native void waitAndDeopt(Thread t); + public native void waitAndInstrumentStack(Thread t); public native void doSelfStackWalk(); void testConcurrent() throws Exception { @@ -41,7 +41,7 @@ public class Main { final Thread current = Thread.currentThread(); Thread t = new Thread(() -> { try { - this.waitAndDeopt(current); + this.waitAndInstrumentStack(current); } catch (Exception e) { throw new Error("Fail!", e); } diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc index a10fe2e905..5eaaa05dbc 100644 --- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc +++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc @@ -76,7 +76,9 @@ extern "C" JNIEXPORT void JNICALL Java_Main_doSelfStackWalk(JNIEnv*, jobject) { CHECK(sswv.found_g_); CHECK(sswv.found_h_); } -extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobject target) { +extern "C" JNIEXPORT void JNICALL Java_Main_waitAndInstrumentStack(JNIEnv*, + jobject, + jobject target) { while (!instrument_waiting) { } bool timed_out = false; @@ -85,7 +87,8 @@ extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobje CHECK(!timed_out); CHECK(other != nullptr); ScopedSuspendAll ssa(__FUNCTION__); - Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(other); + Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(other, + /* deopt_all_frames= */ false); MutexLock mu(Thread::Current(), *Locks::thread_suspend_count_lock_); bool updated = other->ModifySuspendCount(Thread::Current(), -1, nullptr, SuspendReason::kInternal); CHECK(updated); |