diff options
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 33 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 37 | ||||
-rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 59 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 37 | ||||
-rw-r--r-- | runtime/cha.cc | 17 | ||||
-rw-r--r-- | runtime/deoptimization_kind.h | 11 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_default_init_entrypoints.h | 4 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_entrypoints_list.h | 2 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 70 | ||||
-rw-r--r-- | runtime/entrypoints/runtime_asm_entrypoints.h | 2 | ||||
-rw-r--r-- | runtime/entrypoints_order_test.cc | 8 | ||||
-rw-r--r-- | runtime/instrumentation.cc | 188 | ||||
-rw-r--r-- | runtime/instrumentation.h | 36 | ||||
-rw-r--r-- | runtime/oat.h | 4 | ||||
-rw-r--r-- | runtime/quick_exception_handler.cc | 5 | ||||
-rw-r--r-- | runtime/stack.cc | 15 | ||||
-rw-r--r-- | runtime/stack.h | 15 | ||||
-rw-r--r-- | runtime/trace.cc | 16 |
18 files changed, 472 insertions, 87 deletions
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index f5f127472e..5ef1d3e17a 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2525,3 +2525,36 @@ ENTRY art_quick_compile_optimized // artCompileOptimized doesn't allow thread suspension. blx lr END art_quick_compile_optimized + +// On entry, method is at the bottom of the stack. +ENTRY art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME r0 + ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod + mov r1, rSELF @ pass Thread::Current + bl artMethodEntryHook @ (ArtMethod*, Thread*) + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + blx lr +END art_quick_method_entry_hook + +ENTRY art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME r2 + + add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame + add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame + ldr r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod* + mov r0, rSELF @ pass Thread::Current + blx artMethodExitHook @ (Thread*, ArtMethod*, gpr_res*, fpr_res*) + + .cfi_remember_state + cbnz r0, .Ldo_deliver_instrumentation_exception_exit @ Deliver exception + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + blx lr +.Ldo_deliver_instrumentation_exception_exit: + .cfi_restore_state + .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END art_quick_method_exit_hook diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 022a0e4053..e5dbeda42d 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2630,3 +2630,40 @@ ENTRY art_quick_compile_optimized // artCompileOptimized doesn't allow thread suspension. ret END art_quick_compile_optimized + + .extern artMethodEntryHook +ENTRY art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME + + ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod* + mov x1, xSELF // pass Thread::Current + bl artMethodEntryHook // (ArtMethod*, Thread*) + + RESTORE_SAVE_EVERYTHING_FRAME // Note: will restore xSELF + REFRESH_MARKING_REGISTER + ret +END art_quick_method_entry_hook + + .extern artMethodExitHook +ENTRY art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME + + add x3, sp, #16 // floating-point result ptr in kSaveEverything frame + add x2, sp, #272 // integer result ptr in kSaveEverything frame + ldr x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // ArtMethod* + mov x0, xSELF // Thread::Current + bl artMethodExitHook // (Thread*, ArtMethod*, gpr_res*, fpr_res*) + + .cfi_remember_state + cbnz x0, .Ldo_deliver_instrumentation_exception_exit // Handle exception + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + REFRESH_MARKING_REGISTER + ret +.Ldo_deliver_instrumentation_exception_exit: + .cfi_restore_state + .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END art_quick_method_exit_hook + diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index cda98d2921..2f6af4f5de 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -2381,3 +2381,62 @@ DEFINE_FUNCTION art_quick_compile_optimized RESTORE_SAVE_EVERYTHING_FRAME ret END_FUNCTION art_quick_compile_optimized + +DEFINE_FUNCTION art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME edx + mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %eax // Fetch ArtMethod + subl LITERAL(8), %esp + CFI_ADJUST_CFA_OFFSET(8) + + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + CFI_ADJUST_CFA_OFFSET(4) + pushl %eax // Pass Method*. + CFI_ADJUST_CFA_OFFSET(4) + + call SYMBOL(artMethodEntryHook) // (Method*, Thread*) + + addl LITERAL(16), %esp // Pop arguments. + CFI_ADJUST_CFA_OFFSET(-16) + + RESTORE_SAVE_EVERYTHING_FRAME + ret +END_FUNCTION art_quick_method_entry_hook + +DEFINE_FUNCTION art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME ebx + + mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %ebx // Remember ArtMethod* + subl LITERAL(8), %esp // Align stack. + CFI_ADJUST_CFA_OFFSET(8) + PUSH_ARG edx // Save gpr return value. edx and eax need to be together + // which isn't the case in kSaveEverything frame. + PUSH_ARG eax + movl %esp, %edx // Get pointer to gpr_result + leal 32(%esp), %eax // Get pointer to fpr_result, in kSaveEverything frame + PUSH_ARG eax // Pass fpr_result + PUSH_ARG edx // Pass gpr_result + PUSH_ARG ebx // Pass ArtMethod* + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current. + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artMethodExitHook) // (Thread*, ArtMethod*, gpr_result*, fpr_result*) + + // Return result could have been changed if it's a reference. + movl 16(%esp), %ecx + movl %ecx, (80+32)(%esp) + addl LITERAL(32), %esp // Pop arguments and grp_result. + CFI_ADJUST_CFA_OFFSET(-32) + + cmpl LITERAL(1), %eax // Check if we returned error. + CFI_REMEMBER_STATE + je .Ldo_deliver_instrumentation_exception_exit + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + ret +.Ldo_deliver_instrumentation_exception_exit: + CFI_RESTORE_STATE_AND_DEF_CFA esp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_FUNCTION art_quick_method_exit_hook + + + diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 8c21384c62..136198fe55 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -2208,3 +2208,40 @@ DEFINE_FUNCTION art_quick_compile_optimized RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address ret END_FUNCTION art_quick_compile_optimized + +// On entry, method is at the bottom of the stack. +DEFINE_FUNCTION art_quick_method_entry_hook + SETUP_SAVE_EVERYTHING_FRAME + + movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + + call SYMBOL(artMethodEntryHook) // (ArtMethod*, Thread*) + + RESTORE_SAVE_EVERYTHING_FRAME + ret +END_FUNCTION art_quick_method_entry_hook + +// On entry, method is at the bottom of the stack. +// and r8 has should_deopt_frame value. +DEFINE_FUNCTION art_quick_method_exit_hook + SETUP_SAVE_EVERYTHING_FRAME + + leaq 16(%rsp), %rcx // floating-point result pointer in kSaveEverything + // frame + leaq 144(%rsp), %rdx // integer result pointer in kSaveEverything frame + movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod + movq %gs:THREAD_SELF_OFFSET, %rdi // Thread::Current + call SYMBOL(artMethodExitHook) // (Thread*, SP, gpr_res*, fpr_res*) + + cmpq LITERAL(1), %rax + CFI_REMEMBER_STATE + je .Ldo_deliver_instrumentation_exception_exit + + // Normal return. + RESTORE_SAVE_EVERYTHING_FRAME + ret +.Ldo_deliver_instrumentation_exception_exit: + CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_FUNCTION art_quick_method_entry_hook diff --git a/runtime/cha.cc b/runtime/cha.cc index c345af8232..392b35cd81 100644 --- a/runtime/cha.cc +++ b/runtime/cha.cc @@ -219,27 +219,12 @@ class CHAStackVisitor final : public StackVisitor { } // The compiled code on stack is not valid anymore. Need to deoptimize. - SetShouldDeoptimizeFlag(); + SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kCHA); return true; } private: - void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) { - QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo(); - size_t frame_size = frame_info.FrameSizeInBytes(); - uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); - size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) * - GetBytesPerGprSpillLocation(kRuntimeISA); - size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) * - GetBytesPerFprSpillLocation(kRuntimeISA); - size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize; - uint8_t* should_deoptimize_addr = sp + offset; - // Set deoptimization flag to 1. - DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1); - *should_deoptimize_addr = 1; - } - // Set of method headers for compiled code that should be deoptimized. const std::unordered_set<OatQuickMethodHeader*>& method_headers_; diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h index 5be6f3dab1..c2e6a6585a 100644 --- a/runtime/deoptimization_kind.h +++ b/runtime/deoptimization_kind.h @@ -29,6 +29,7 @@ enum class DeoptimizationKind { kLoopNullBCE, kBlockBCE, kCHA, + kDebugging, kFullFrame, kLast = kFullFrame }; @@ -42,6 +43,7 @@ inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null"; case DeoptimizationKind::kBlockBCE: return "block bounds check elimination"; case DeoptimizationKind::kCHA: return "class hierarchy analysis"; + case DeoptimizationKind::kDebugging: return "Deopt requested for debug support"; case DeoptimizationKind::kFullFrame: return "full frame"; } LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind); @@ -50,6 +52,15 @@ inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind); +// We use a DeoptimizationStackSlot to record if a deoptimization is required +// for functions that are already on stack. The value in the slot specifies the +// reason we need to deoptimize. +enum class DeoptimizeFlagValue: uint8_t { + kCHA = 0b01, + kDebug = 0b10, + kAll = kCHA | kDebug +}; + } // namespace art #endif // ART_RUNTIME_DEOPTIMIZATION_KIND_H_ diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index 9b1bd26a1d..9f1766d3f2 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -129,6 +129,10 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp qpoints->pUpdateInlineCache = art_quick_update_inline_cache; qpoints->pCompileOptimized = art_quick_compile_optimized; + // Tracing hooks + qpoints->pMethodEntryHook = art_quick_method_entry_hook; + qpoints->pMethodExitHook = art_quick_method_exit_hook; + bool should_report = false; PaletteShouldReportJniInvocations(&should_report); if (should_report) { diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index 4c451c6687..a77e849d32 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -206,6 +206,8 @@ V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \ V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \ \ + V(MethodEntryHook, void, ArtMethod*, Thread*) \ + V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*) #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ // #define is only for lint. diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 0b58c367b8..b29da658cb 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -60,6 +60,9 @@ namespace art { +extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self); +extern "C" NO_RETURN void artDeoptimize(Thread* self); + // Visits the arguments as saved to the stack by a CalleeSaveType::kRefAndArgs callee save frame. class QuickArgumentVisitor { // Number of bytes for each out register in the caller method's frame. @@ -2586,4 +2589,71 @@ extern "C" uint64_t artInvokeCustom(uint32_t call_site_idx, Thread* self, ArtMet return result.GetJ(); } +extern "C" void artMethodEntryHook(ArtMethod* method, Thread* self, ArtMethod** sp ATTRIBUTE_UNUSED) + REQUIRES_SHARED(Locks::mutator_lock_) { + instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation(); + instr->MethodEnterEvent(self, method); + if (instr->IsDeoptimized(method)) { + // Instrumentation can request deoptimizing only a particular method (for + // ex: when there are break points on the method). In such cases deoptimize + // only this method. FullFrame deoptimizations are handled on method exits. + artDeoptimizeFromCompiledCode(DeoptimizationKind::kDebugging, self); + } +} + +extern "C" int artMethodExitHook(Thread* self, + ArtMethod* method, + uint64_t* gpr_result, + uint64_t* fpr_result) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current())); + CHECK(gpr_result != nullptr); + CHECK(fpr_result != nullptr); + // Instrumentation exit stub must not be entered with a pending exception. + CHECK(!self->IsExceptionPending()) + << "Enter instrumentation exit stub with pending exception " << self->GetException()->Dump(); + + instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation(); + bool is_ref; + JValue return_value = instr->GetReturnValue(self, method, &is_ref, gpr_result, fpr_result); + bool deoptimize = false; + { + StackHandleScope<1> hs(self); + MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); + if (is_ref) { + // Take a handle to the return value so we won't lose it if we suspend. + res.Assign(return_value.GetL()); + } + DCHECK(!method->IsRuntimeMethod()); + instr->MethodExitEvent(self, + method, + /* frame= */ {}, + return_value); + + // Deoptimize if the caller needs to continue execution in the interpreter. Do nothing if we get + // back to an upcall. + NthCallerVisitor visitor(self, 1, true); + visitor.WalkStack(true); + deoptimize = instr->ShouldDeoptimizeMethod(self, visitor); + + if (is_ref) { + // Restore the return value if it's a reference since it might have moved. + *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get(); + } + } + + if (self->IsExceptionPending() || self->ObserveAsyncException()) { + return 1; + } + + if (deoptimize) { + DeoptimizationMethodType deopt_method_type = instr->GetDeoptimizationMethodType(method); + self->PushDeoptimizationContext(return_value, is_ref, nullptr, false, deopt_method_type); + artDeoptimize(self); + UNREACHABLE(); + } + + return 0; +} + } // namespace art diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index d2096ec2f1..c4e62e5b87 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -96,6 +96,8 @@ static inline const void* GetQuickInstrumentationExitPc() { extern "C" void* art_quick_string_builder_append(uint32_t format); extern "C" void art_quick_compile_optimized(ArtMethod*, Thread*); +extern "C" void art_quick_method_entry_hook(ArtMethod*, Thread*); +extern "C" int32_t art_quick_method_exit_hook(Thread*, ArtMethod*, uint64_t*, uint64_t*); } // namespace art diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 0853caef44..609f0811f4 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -402,9 +402,13 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow, sizeof(void*)); + EXPECT_OFFSET_DIFFNP( + QuickEntryPoints, pReadBarrierForRootSlow, pMethodEntryHook, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodEntryHook, pMethodExitHook, sizeof(void*)); - CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierForRootSlow) - + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all); + CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pMethodExitHook) + sizeof(void*) == + sizeof(QuickEntryPoints), + QuickEntryPoints_all); } }; diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index e664eb1366..87db89927e 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -229,6 +229,34 @@ bool Instrumentation::NeedDebugVersionFor(ArtMethod* method) const !method->IsProxyMethod(); } +bool Instrumentation::CodeNeedsEntryExitStub(const void* code, ArtMethod* method) { + // In some tests runtime isn't setup fully and hence the entry points could + // be nullptr. + if (code == nullptr) { + return true; + } + + // When jiting code for debuggable apps we generate the code to call method + // entry / exit hooks when required. Hence it is not required to update + // to instrumentation entry point for JITed code in debuggable mode. + if (!Runtime::Current()->IsJavaDebuggable()) { + return true; + } + + // Native functions can have JITed entry points but we don't include support + // for calling entry / exit hooks directly from the JITed code for native + // functions. So we still have to install entry exit stubs for such cases. + if (method->IsNative()) { + return true; + } + + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) { + return false; + } + return true; +} + void Instrumentation::InstallStubsForMethod(ArtMethod* method) { if (!method->IsInvokable() || method->IsProxyMethod()) { // Do not change stubs for these methods. @@ -272,7 +300,12 @@ void Instrumentation::InstallStubsForMethod(ArtMethod* method) { if (entry_exit_stubs_installed_) { // This needs to be checked first since the instrumentation entrypoint will be able to // find the actual JIT compiled code that corresponds to this method. - new_quick_code = GetQuickInstrumentationEntryPoint(); + const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize); + if (CodeNeedsEntryExitStub(code, method)) { + new_quick_code = GetQuickInstrumentationEntryPoint(); + } else { + new_quick_code = code; + } } else if (NeedDebugVersionFor(method)) { // It would be great to search the JIT for its implementation here but we cannot due to // the locks we hold. Instead just set to the interpreter bridge and that code will search @@ -290,22 +323,29 @@ void Instrumentation::InstallStubsForMethod(ArtMethod* method) { } // Places the instrumentation exit pc as the return PC for every quick frame. This also allows -// deoptimization of quick frames to interpreter frames. +// deoptimization of quick frames to interpreter frames. When force_deopt is +// true the frames have to be deoptimized. If the frame has a deoptimization +// stack slot (all Jited frames), it is set to true to indicate this. For frames +// that do not have this slot, the force_deopt_id on the InstrumentationStack is +// used to check if the frame needs to be deoptimized. When force_deopt is false +// we just instrument the stack for method entry / exit hooks. // Since we may already have done this previously, we need to push new instrumentation frame before // existing instrumentation frames. -void InstrumentationInstallStack(Thread* thread, void* arg) +void InstrumentationInstallStack(Thread* thread, void* arg, bool deopt_all_frames) REQUIRES(Locks::mutator_lock_) { Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current()); struct InstallStackVisitor final : public StackVisitor { InstallStackVisitor(Thread* thread_in, Context* context, uintptr_t instrumentation_exit_pc, - uint64_t force_deopt_id) + uint64_t force_deopt_id, + bool deopt_all_frames) : StackVisitor(thread_in, context, kInstrumentationStackWalk), instrumentation_stack_(thread_in->GetInstrumentationStack()), instrumentation_exit_pc_(instrumentation_exit_pc), reached_existing_instrumentation_frames_(false), - force_deopt_id_(force_deopt_id) {} + force_deopt_id_(force_deopt_id), + deopt_all_frames_(deopt_all_frames) {} bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) { ArtMethod* m = GetMethod(); @@ -355,6 +395,15 @@ void InstrumentationInstallStack(Thread* thread, void* arg) LOG(INFO) << "Ignoring already instrumented " << frame.Dump(); } } else { + // If it is a JITed frame then just set the deopt bit if required + // otherwise continue + const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader(); + if (method_header != nullptr && method_header->HasShouldDeoptimizeFlag()) { + if (deopt_all_frames_) { + SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kDebug); + } + return true; + } CHECK_NE(return_pc, 0U); if (UNLIKELY(reached_existing_instrumentation_frames_ && !m->IsRuntimeMethod())) { // We already saw an existing instrumentation frame so this should be a runtime-method @@ -397,6 +446,7 @@ void InstrumentationInstallStack(Thread* thread, void* arg) const uintptr_t instrumentation_exit_pc_; bool reached_existing_instrumentation_frames_; uint64_t force_deopt_id_; + bool deopt_all_frames_; }; if (kVerboseInstrumentation) { std::string thread_name; @@ -407,8 +457,11 @@ void InstrumentationInstallStack(Thread* thread, void* arg) Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg); std::unique_ptr<Context> context(Context::Create()); uintptr_t instrumentation_exit_pc = reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()); - InstallStackVisitor visitor( - thread, context.get(), instrumentation_exit_pc, instrumentation->current_force_deopt_id_); + InstallStackVisitor visitor(thread, + context.get(), + instrumentation_exit_pc, + instrumentation->current_force_deopt_id_, + deopt_all_frames); visitor.WalkStack(true); if (instrumentation->ShouldNotifyMethodEnterExitEvents()) { @@ -431,9 +484,9 @@ void InstrumentationInstallStack(Thread* thread, void* arg) thread->VerifyStack(); } -void Instrumentation::InstrumentThreadStack(Thread* thread) { +void Instrumentation::InstrumentThreadStack(Thread* thread, bool force_deopt) { instrumentation_stubs_installed_ = true; - InstrumentationInstallStack(thread, this); + InstrumentationInstallStack(thread, this, force_deopt); } // Removes the instrumentation exit pc as the return PC for every quick frame. @@ -529,7 +582,7 @@ void Instrumentation::DeoptimizeAllThreadFrames() { ThreadList* tl = Runtime::Current()->GetThreadList(); tl->ForEach([&](Thread* t) { Locks::mutator_lock_->AssertExclusiveHeld(self); - InstrumentThreadStack(t); + InstrumentThreadStack(t, /* deopt_all_frames= */ true); }); current_force_deopt_id_++; } @@ -781,7 +834,9 @@ void Instrumentation::UpdateStubs() { runtime->GetClassLinker()->VisitClasses(&visitor); instrumentation_stubs_installed_ = true; MutexLock mu(self, *Locks::thread_list_lock_); - runtime->GetThreadList()->ForEach(InstrumentationInstallStack, this); + for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { + InstrumentThreadStack(thread, /* deopt_all_frames= */ false); + } } else { interpreter_stubs_installed_ = false; entry_exit_stubs_installed_ = false; @@ -905,7 +960,8 @@ void Instrumentation::UpdateMethodsCodeImpl(ArtMethod* method, const void* quick // implementation directly and this will confuse the instrumentation trampolines. // TODO We should remove the need for this since it makes it impossible to profile // Proxy.<init> correctly in all cases. - method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init)) { + method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init) && + CodeNeedsEntryExitStub(quick_code, method)) { new_quick_code = GetQuickInstrumentationEntryPoint(); } else { new_quick_code = quick_code; @@ -998,7 +1054,12 @@ void Instrumentation::Deoptimize(ArtMethod* method) { // these previously so it will only cover the newly created frames. instrumentation_stubs_installed_ = true; MutexLock mu(self, *Locks::thread_list_lock_); - Runtime::Current()->GetThreadList()->ForEach(InstrumentationInstallStack, this); + for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { + // This isn't a strong deopt. We deopt this method if it is still in the + // deopt methods list. If by the time we hit this frame we no longer need + // a deopt it is safe to continue. So we don't mark the frame. + InstrumentThreadStack(thread, /* deopt_all_frames= */ false); + } } } @@ -1424,28 +1485,8 @@ static char GetRuntimeMethodShorty(Thread* thread) REQUIRES_SHARED(Locks::mutato return shorty; } -TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, - uintptr_t* return_pc_addr, - uint64_t* gpr_result, - uint64_t* fpr_result) { - DCHECK(gpr_result != nullptr); - DCHECK(fpr_result != nullptr); - // Do the pop. - std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack = - self->GetInstrumentationStack(); - CHECK_GT(stack->size(), 0U); - auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr)); - CHECK(it != stack->end()); - InstrumentationStackFrame instrumentation_frame = it->second; - stack->erase(it); - - // Set return PC and check the consistency of the stack. - // We don't cache the return pc value in a local as it may change after - // sending a method exit event. - *return_pc_addr = instrumentation_frame.return_pc_; - self->VerifyStack(); - - ArtMethod* method = instrumentation_frame.method_; +JValue Instrumentation::GetReturnValue( + Thread* self, ArtMethod* method, bool* is_ref, uint64_t* gpr_result, uint64_t* fpr_result) { uint32_t length; const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); char return_shorty; @@ -1477,9 +1518,7 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0]; } - bool is_ref = return_shorty == '[' || return_shorty == 'L'; - StackHandleScope<1> hs(self); - MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); + *is_ref = return_shorty == '[' || return_shorty == 'L'; JValue return_value; if (return_shorty == 'V') { return_value.SetJ(0); @@ -1488,6 +1527,59 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, } else { return_value.SetJ(*gpr_result); } + return return_value; +} + +bool Instrumentation::ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor) { + bool should_deoptimize_frame = false; + const OatQuickMethodHeader* header = visitor.GetCurrentOatQuickMethodHeader(); + if (header != nullptr && header->HasShouldDeoptimizeFlag()) { + uint8_t should_deopt_flag = visitor.GetShouldDeoptimizeFlag(); + // DeoptimizeFlag could be set for debugging or for CHA invalidations. + // Deoptimize here only if it was requested for debugging. CHA + // invalidations are handled in the JITed code. + if ((should_deopt_flag & static_cast<uint8_t>(DeoptimizeFlagValue::kDebug)) != 0) { + should_deoptimize_frame = true; + } + } + return (visitor.caller != nullptr) && + (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) || + self->IsForceInterpreter() || + // NB Since structurally obsolete compiled methods might have the offsets of + // methods/fields compiled in we need to go back to interpreter whenever we hit + // them. + visitor.caller->GetDeclaringClass()->IsObsoleteObject() || + Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller) || + should_deoptimize_frame); +} + +TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, + uintptr_t* return_pc_addr, + uint64_t* gpr_result, + uint64_t* fpr_result) { + DCHECK(gpr_result != nullptr); + DCHECK(fpr_result != nullptr); + // Do the pop. + std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack = + self->GetInstrumentationStack(); + CHECK_GT(stack->size(), 0U); + auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr)); + CHECK(it != stack->end()); + InstrumentationStackFrame instrumentation_frame = it->second; + stack->erase(it); + + // Set return PC and check the consistency of the stack. + // We don't cache the return pc value in a local as it may change after + // sending a method exit event. + *return_pc_addr = instrumentation_frame.return_pc_; + self->VerifyStack(); + + ArtMethod* method = instrumentation_frame.method_; + + bool is_ref; + JValue return_value = GetReturnValue(self, method, &is_ref, gpr_result, fpr_result); + StackHandleScope<1> hs(self); + MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr)); if (is_ref) { // Take a handle to the return value so we won't lose it if we suspend. // FIXME: The `is_ref` is often guessed wrong, so even object aligment @@ -1504,17 +1596,11 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, // back to an upcall. NthCallerVisitor visitor(self, 1, true); visitor.WalkStack(true); - bool deoptimize = (visitor.caller != nullptr) && - (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) || - self->IsForceInterpreter() || - // NB Since structurally obsolete compiled methods might have the offsets of - // methods/fields compiled in we need to go back to interpreter whenever we hit - // them. - visitor.caller->GetDeclaringClass()->IsObsoleteObject() || - // Check if we forced all threads to deoptimize in the time between this frame - // being created and now. - instrumentation_frame.force_deopt_id_ != current_force_deopt_id_ || - Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller)); + // Check if we forced all threads to deoptimize in the time between this frame being created and + // now. + bool should_deoptimize_frame = instrumentation_frame.force_deopt_id_ != current_force_deopt_id_; + bool deoptimize = ShouldDeoptimizeMethod(self, visitor) || should_deoptimize_frame; + if (is_ref) { // Restore the return value if it's a reference since it might have moved. *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get(); @@ -1532,8 +1618,8 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, } DeoptimizationMethodType deopt_method_type = GetDeoptimizationMethodType(method); self->PushDeoptimizationContext(return_value, - return_shorty == 'L' || return_shorty == '[', - /* exception= */ nullptr , + is_ref, + /* exception= */ nullptr, /* from_code= */ false, deopt_method_type); return GetTwoWordSuccessValue(*return_pc_addr, diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h index 988627c76e..4f4bb424d4 100644 --- a/runtime/instrumentation.h +++ b/runtime/instrumentation.h @@ -17,12 +17,13 @@ #ifndef ART_RUNTIME_INSTRUMENTATION_H_ #define ART_RUNTIME_INSTRUMENTATION_H_ -#include <functional> #include <stdint.h> + +#include <functional> #include <list> #include <memory> -#include <unordered_set> #include <optional> +#include <unordered_set> #include "arch/instruction_set.h" #include "base/enums.h" @@ -30,6 +31,7 @@ #include "base/macros.h" #include "base/safe_map.h" #include "gc_root.h" +#include "offsets.h" namespace art { namespace mirror { @@ -41,6 +43,7 @@ class ArtField; class ArtMethod; template <typename T> class Handle; template <typename T> class MutableHandle; +struct NthCallerVisitor; union JValue; class SHARED_LOCKABLE ReaderWriterMutex; class ShadowFrame; @@ -203,6 +206,10 @@ class Instrumentation { Instrumentation(); + static constexpr MemberOffset NeedsEntryExitHooksOffset() { + return MemberOffset(OFFSETOF_MEMBER(Instrumentation, instrumentation_stubs_installed_)); + } + // Add a listener to be notified of the masked together sent of instrumentation events. This // suspend the runtime to install stubs. You are expected to hold the mutator lock as a proxy // for saying you should have suspended all threads (installing stubs while threads are running @@ -479,6 +486,14 @@ class Instrumentation { void ExceptionHandledEvent(Thread* thread, ObjPtr<mirror::Throwable> exception_object) const REQUIRES_SHARED(Locks::mutator_lock_); + JValue GetReturnValue(Thread* self, + ArtMethod* method, + bool* is_ref, + uint64_t* gpr_result, + uint64_t* fpr_result) REQUIRES_SHARED(Locks::mutator_lock_); + bool ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor) + REQUIRES_SHARED(Locks::mutator_lock_); + // Called when an instrumented method is entered. The intended link register (lr) is saved so // that returning causes a branch to the method exit stub. Generates method enter events. void PushInstrumentationStackFrame(Thread* self, @@ -524,10 +539,13 @@ class Instrumentation { !GetDeoptimizedMethodsLock()); // Install instrumentation exit stub on every method of the stack of the given thread. - // This is used by the debugger to cause a deoptimization of the thread's stack after updating - // local variable(s). - void InstrumentThreadStack(Thread* thread) - REQUIRES(Locks::mutator_lock_); + // This is used by: + // - the debugger to cause a deoptimization of the all frames in thread's stack (for + // example, after updating local variables) + // - to call method entry / exit hooks for tracing. For this we instrument + // the stack frame to run entry / exit hooks but we don't need to deoptimize. + // deopt_all_frames indicates whether the frames need to deoptimize or not. + void InstrumentThreadStack(Thread* thread, bool deopt_all_frames) REQUIRES(Locks::mutator_lock_); // Force all currently running frames to be deoptimized back to interpreter. This should only be // used in cases where basically all compiled code has been invalidated. @@ -551,6 +569,10 @@ class Instrumentation { // False otherwise. bool RequiresInstrumentationInstallation(InstrumentationLevel new_level) const; + // Returns true if we need entry exit stub to call entry hooks. JITed code + // directly call entry / exit hooks and don't need the stub. + bool CodeNeedsEntryExitStub(const void* code, ArtMethod* method); + // Does the job of installing or removing instrumentation code within methods. // In order to support multiple clients using instrumentation at the same time, // the caller must pass a unique key (a string) identifying it so we remind which @@ -743,7 +765,7 @@ class Instrumentation { friend class InstrumentationTest; // For GetCurrentInstrumentationLevel and ConfigureStubs. friend class InstrumentationStackPopper; // For popping instrumentation frames. - friend void InstrumentationInstallStack(Thread*, void*); + friend void InstrumentationInstallStack(Thread*, void*, bool); DISALLOW_COPY_AND_ASSIGN(Instrumentation); }; diff --git a/runtime/oat.h b/runtime/oat.h index 264ad9b116..51b9e2e7ca 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: JNI: Rewrite read barrier slow path. - static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '0', '8', '\0' } }; + // Last oat version changed reason: reland new entry points for method entry / exit hooks. + static constexpr std::array<uint8_t, 4> kOatVersion{ {'2', '0', '9', '\0'} }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 5f497af46a..ac5065b2a6 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -599,7 +599,10 @@ void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) { << GetDeoptimizationKindName(kind); DumpFramesWithType(self_, /* details= */ true); } - if (Runtime::Current()->UseJitCompilation()) { + // When deoptimizing for debug support the optimized code is still valid and + // can be reused when debugging support (like breakpoints) are no longer + // needed fot this method. + if (Runtime::Current()->UseJitCompilation() && (kind != DeoptimizationKind::kDebugging)) { Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor( deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader()); } else { diff --git a/runtime/stack.cc b/runtime/stack.cc index 233106eb0a..eb0fe5692d 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -800,6 +800,21 @@ QuickMethodFrameInfo StackVisitor::GetCurrentQuickFrameInfo() const { return RuntimeCalleeSaveFrame::GetMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs); } +uint8_t* StackVisitor::GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(GetCurrentOatQuickMethodHeader()->HasShouldDeoptimizeFlag()); + QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo(); + size_t frame_size = frame_info.FrameSizeInBytes(); + uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); + size_t core_spill_size = + POPCOUNT(frame_info.CoreSpillMask()) * GetBytesPerGprSpillLocation(kRuntimeISA); + size_t fpu_spill_size = + POPCOUNT(frame_info.FpSpillMask()) * GetBytesPerFprSpillLocation(kRuntimeISA); + size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize; + uint8_t* should_deoptimize_addr = sp + offset; + DCHECK_EQ(*should_deoptimize_addr & ~static_cast<uint8_t>(DeoptimizeFlagValue::kAll), 0); + return should_deoptimize_addr; +} + template <StackVisitor::CountTransitions kCount> void StackVisitor::WalkStack(bool include_transitions) { if (check_suspended_) { diff --git a/runtime/stack.h b/runtime/stack.h index 2a6fdc2b35..1b00b54acb 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -17,12 +17,14 @@ #ifndef ART_RUNTIME_STACK_H_ #define ART_RUNTIME_STACK_H_ -#include <optional> #include <stdint.h> + +#include <optional> #include <string> #include "base/locks.h" #include "base/macros.h" +#include "deoptimization_kind.h" #include "obj_ptr.h" #include "quick/quick_method_frame_info.h" #include "stack_map.h" @@ -295,6 +297,15 @@ class StackVisitor { QuickMethodFrameInfo GetCurrentQuickFrameInfo() const REQUIRES_SHARED(Locks::mutator_lock_); + void SetShouldDeoptimizeFlag(DeoptimizeFlagValue value) REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t* should_deoptimize_addr = GetShouldDeoptimizeFlagAddr(); + *should_deoptimize_addr = *should_deoptimize_addr | static_cast<uint8_t>(value); + }; + + uint8_t GetShouldDeoptimizeFlag() const REQUIRES_SHARED(Locks::mutator_lock_) { + return *GetShouldDeoptimizeFlagAddr(); + } + private: // Private constructor known in the case that num_frames_ has already been computed. StackVisitor(Thread* thread, @@ -368,6 +379,8 @@ class StackVisitor { mutable std::pair<const OatQuickMethodHeader*, CodeInfo> cur_inline_info_; mutable std::pair<uintptr_t, StackMap> cur_stack_map_; + uint8_t* GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_); + protected: Context* const context_; const bool check_suspended_; diff --git a/runtime/trace.cc b/runtime/trace.cc index ca0fe10d59..4b5412f3ad 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -421,10 +421,11 @@ void Trace::Start(std::unique_ptr<File>&& trace_file_in, "Sampling profiler thread"); the_trace_->interval_us_ = interval_us; } else { - runtime->GetInstrumentation()->AddListener(the_trace_, - instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kMethodExited | - instrumentation::Instrumentation::kMethodUnwind); + runtime->GetInstrumentation()->AddListener( + the_trace_, + instrumentation::Instrumentation::kMethodEntered | + instrumentation::Instrumentation::kMethodExited | + instrumentation::Instrumentation::kMethodUnwind); // TODO: In full-PIC mode, we don't need to fully deopt. // TODO: We can only use trampoline entrypoints if we are java-debuggable since in that case // we know that inlining and other problematic optimizations are disabled. We might just @@ -480,9 +481,10 @@ void Trace::StopTracing(bool finish_tracing, bool flush_file) { runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr); } else { runtime->GetInstrumentation()->RemoveListener( - the_trace, instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kMethodExited | - instrumentation::Instrumentation::kMethodUnwind); + the_trace, + instrumentation::Instrumentation::kMethodEntered | + instrumentation::Instrumentation::kMethodExited | + instrumentation::Instrumentation::kMethodUnwind); runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey); } } |