diff options
23 files changed, 192 insertions, 86 deletions
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index be9af9871d..1599025697 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -167,7 +167,8 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception; qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow; - qpoints->pDeoptimize = art_quick_deoptimize; + // Deoptimization from compiled code. + qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; // Read barrier qpoints->pReadBarrierJni = ReadBarrierJni; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index d6396c18d8..e45d828584 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1141,6 +1141,17 @@ ENTRY art_quick_deoptimize END art_quick_deoptimize /* + * Compiled code has requested that we deoptimize into the interpreter. The deoptimization + * will long jump to the upcall with a special exception of -1. + */ + .extern artDeoptimizeFromCompiledCode +ENTRY art_quick_deoptimize_from_compiled_code + SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1 + mov r0, r9 @ Set up args. + blx artDeoptimizeFromCompiledCode @ artDeoptimizeFromCompiledCode(Thread*) +END art_quick_deoptimize_from_compiled_code + + /* * Signed 64-bit integer multiply. * * Consider WXxYZ (r1r0 x r3r2) with a long multiply: diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index 0f06727d0d..e9c816f260 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -150,8 +150,8 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception; qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow; - // Deoptimize - qpoints->pDeoptimize = art_quick_deoptimize; + // Deoptimization from compiled code. + qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; // Read barrier qpoints->pReadBarrierJni = ReadBarrierJni; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index bfef0fa74a..169bc384a8 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1739,6 +1739,18 @@ ENTRY art_quick_deoptimize brk 0 END art_quick_deoptimize + /* + * Compiled code has requested that we deoptimize into the interpreter. The deoptimization + * will long jump to the upcall with a special exception of -1. + */ + .extern artDeoptimizeFromCompiledCode +ENTRY art_quick_deoptimize_from_compiled_code + SETUP_SAVE_ALL_CALLEE_SAVE_FRAME + mov x0, xSELF // Pass thread. + bl artDeoptimizeFromCompiledCode // artDeoptimizeFromCompiledCode(Thread*) + brk 0 +END art_quick_deoptimize_from_compiled_code + /* * String's indexOf. diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 4e4b91fdcd..6721e5452f 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -267,8 +267,8 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow; static_assert(!IsDirectEntrypoint(kQuickThrowStackOverflow), "Non-direct C stub marked direct."); - // Deoptimize - qpoints->pDeoptimize = art_quick_deoptimize; + // Deoptimization from compiled code. + qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; static_assert(!IsDirectEntrypoint(kQuickDeoptimize), "Non-direct C stub marked direct."); // Atomic 64-bit load/store diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index cb49cf5b39..ba58c3fccb 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -1542,6 +1542,18 @@ ENTRY art_quick_deoptimize END art_quick_deoptimize /* + * Compiled code has requested that we deoptimize into the interpreter. The deoptimization + * will long jump to the upcall with a special exception of -1. + */ + .extern artDeoptimizeFromCompiledCode +ENTRY art_quick_deoptimize_from_compiled_code + SETUP_SAVE_ALL_CALLEE_SAVE_FRAME + jal artDeoptimizeFromCompiledCode # artDeoptimizeFromCompiledCode(Thread*) + # Returns caller method's frame size. + move $a0, rSELF # pass Thread::current +END art_quick_deoptimize_from_compiled_code + + /* * Long integer shift. This is different from the generic 32/64-bit * binary operations because vAA/vBB are 64-bit but vCC (the shift * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index ec02d5ab69..9f1f0e021c 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -176,8 +176,8 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception; qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow; - // Deoptimize - qpoints->pDeoptimize = art_quick_deoptimize; + // Deoptimization from compiled code. + qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; // TODO - use lld/scd instructions for Mips64 // Atomic 64-bit load/store diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 4bc049cfbd..1b50b2e246 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -1603,5 +1603,17 @@ ENTRY art_quick_deoptimize move $a0, rSELF # pass Thread::current END art_quick_deoptimize + /* + * Compiled code has requested that we deoptimize into the interpreter. The deoptimization + * will long jump to the upcall with a special exception of -1. + */ + .extern artDeoptimizeFromCompiledCode +ENTRY art_quick_deoptimize_from_compiled_code + SETUP_SAVE_ALL_CALLEE_SAVE_FRAME + jal artDeoptimizeFromCompiledCode # artDeoptimizeFromCompiledCode(Thread*, SP) + # Returns caller method's frame size. + move $a0, rSELF # pass Thread::current +END art_quick_deoptimize_from_compiled_code + UNIMPLEMENTED art_quick_indexof UNIMPLEMENTED art_quick_string_compareto diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index e2632c103b..10fc281e3d 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -140,7 +140,7 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow; // Deoptimize - qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path; + qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; // Read barrier qpoints->pReadBarrierJni = ReadBarrierJni; diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 9b2d59d330..029a296e5a 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1684,9 +1684,6 @@ END_FUNCTION art_quick_instrumentation_exit */ DEFINE_FUNCTION art_quick_deoptimize PUSH ebx // Entry point for a jump. Fake that we were called. -.globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path) // Entry point for real calls - // from compiled slow paths. -SYMBOL(art_quick_deoptimize_from_compiled_slow_path): SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx subl LITERAL(12), %esp // Align stack. CFI_ADJUST_CFA_OFFSET(12) @@ -1697,6 +1694,20 @@ SYMBOL(art_quick_deoptimize_from_compiled_slow_path): END_FUNCTION art_quick_deoptimize /* + * Compiled code has requested that we deoptimize into the interpreter. The deoptimization + * will long jump to the upcall with a special exception of -1. + */ +DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code + SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx + subl LITERAL(12), %esp // Align stack. + CFI_ADJUST_CFA_OFFSET(12) + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*) + UNREACHABLE +END_FUNCTION art_quick_deoptimize_from_compiled_code + + /* * String's compareTo. * * On entry: diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index ef1bb5f9a7..5cc72e3c4b 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -144,7 +144,7 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow; // Deoptimize - qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path; + qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; // Read barrier qpoints->pReadBarrierJni = ReadBarrierJni; diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 88270d9902..1498a4b7e3 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1728,9 +1728,6 @@ END_FUNCTION art_quick_instrumentation_exit DEFINE_FUNCTION art_quick_deoptimize pushq %rsi // Entry point for a jump. Fake that we were called. // Use hidden arg. -.globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path) // Entry point for real calls - // from compiled slow paths. -SYMBOL(art_quick_deoptimize_from_compiled_slow_path): SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // Stack should be aligned now. movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. @@ -1739,6 +1736,18 @@ SYMBOL(art_quick_deoptimize_from_compiled_slow_path): END_FUNCTION art_quick_deoptimize /* + * Compiled code has requested that we deoptimize into the interpreter. The deoptimization + * will long jump to the upcall with a special exception of -1. + */ +DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code + SETUP_SAVE_ALL_CALLEE_SAVE_FRAME + // Stack should be aligned now. + movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. + call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*) + UNREACHABLE +END_FUNCTION art_quick_deoptimize_from_compiled_code + + /* * String's compareTo. * * On entry: diff --git a/runtime/art_method.cc b/runtime/art_method.cc index 56f7b35501..e46402d882 100644 --- a/runtime/art_method.cc +++ b/runtime/art_method.cc @@ -427,9 +427,16 @@ void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* self->ClearException(); ShadowFrame* shadow_frame = self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame); - result->SetJ(self->PopDeoptimizationReturnValue().GetJ()); + mirror::Throwable* pending_exception = nullptr; + self->PopDeoptimizationContext(result, &pending_exception); self->SetTopOfStack(nullptr); self->SetTopOfShadowStack(shadow_frame); + + // Restore the exception that was pending before deoptimization then interpret the + // deoptimized frames. + if (pending_exception != nullptr) { + self->SetException(pending_exception); + } interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result); } if (kLogInvocationStartAndReturn) { diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 084c88e239..5c1922eea3 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -89,7 +89,7 @@ ADD_TEST_EQ(THREAD_ID_OFFSET, art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.card_table. -#define THREAD_CARD_TABLE_OFFSET 136 +#define THREAD_CARD_TABLE_OFFSET 128 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET, art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value()) diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index a4feac1ea1..d749664d12 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -28,17 +28,30 @@ namespace art { -extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { - ScopedQuickEntrypointChecks sqec(self); - +NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { if (VLOG_IS_ON(deopt)) { LOG(INFO) << "Deopting:"; self->Dump(LOG(INFO)); } - self->PushAndClearDeoptimizationReturnValue(); + self->AssertHasDeoptimizationContext(); self->SetException(Thread::GetDeoptimizationException()); self->QuickDeliverException(); } +extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + artDeoptimizeImpl(self); +} + +extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self) + SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + // Before deoptimizing to interpreter, we must push the deoptimization context. + JValue return_value; + return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result. + self->PushDeoptimizationContext(return_value, false, self->GetException()); + artDeoptimizeImpl(self); +} + } // namespace art diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc index ad5ee8475e..8e660a246d 100644 --- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc @@ -51,6 +51,9 @@ extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self, ArtM uint64_t gpr_result, uint64_t fpr_result) SHARED_REQUIRES(Locks::mutator_lock_) { + // Instrumentation exit stub must not be entered with a pending exception. + CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception " + << self->GetException()->Dump(); // Compute address of return PC and sanity check that it currently holds 0. size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly); uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) + diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index aa35ec1ca2..0c7caf38b9 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -688,8 +688,12 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, // Request a stack deoptimization if needed ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { + // Push the context of the deoptimization stack so we can restore the return value and the + // exception before executing the deoptimized frames. + self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); + + // Set special exception to cause deoptimization. self->SetException(Thread::GetDeoptimizationException()); - self->SetDeoptimizationReturnValue(result, shorty[0] == 'L'); } // No need to restore the args since the method has already been run by the interpreter. diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index 8209dc808e..2842c5a5a6 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -70,7 +70,8 @@ static inline const void* GetQuickInstrumentationEntryPoint() { return reinterpret_cast<const void*>(art_quick_instrumentation_entry); } -extern "C" void art_quick_deoptimize_from_compiled_slow_path(); +// Stub to deoptimize from compiled code. +extern "C" void art_quick_deoptimize_from_compiled_code(); // The return_pc of instrumentation exit stub. extern "C" void art_quick_instrumentation_exit(); diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index f7a3cd53cd..7db8888c7f 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -72,15 +72,12 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFP(Thread, tls32_, throwing_OutOfMemoryError, no_thread_suspension, 4); EXPECT_OFFSET_DIFFP(Thread, tls32_, no_thread_suspension, thread_exit_check_count, 4); EXPECT_OFFSET_DIFFP(Thread, tls32_, thread_exit_check_count, handling_signal_, 4); - EXPECT_OFFSET_DIFFP(Thread, tls32_, handling_signal_, - deoptimization_return_value_is_reference, 4); // TODO: Better connection. Take alignment into account. EXPECT_OFFSET_DIFF_GT3(Thread, tls32_.thread_exit_check_count, tls64_.trace_clock_base, 4, thread_tls32_to_tls64); - EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, deoptimization_return_value, 8); - EXPECT_OFFSET_DIFFP(Thread, tls64_, deoptimization_return_value, stats, 8); + EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, stats, 8); // TODO: Better connection. Take alignment into account. EXPECT_OFFSET_DIFF_GT3(Thread, tls64_.stats, tlsPtr_.card_table, 8, thread_tls64_to_tlsptr); @@ -108,8 +105,8 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, single_step_control, stacked_shadow_frame_record, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stacked_shadow_frame_record, - deoptimization_return_value_stack, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_return_value_stack, name, sizeof(void*)); + deoptimization_context_stack, sizeof(void*)); + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_context_stack, name, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause, sizeof(void*)); diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index e28d578121..63c02ed686 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -1016,7 +1016,8 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintpt PrettyMethod(method).c_str(), return_value.GetJ()) << *self; } - self->SetDeoptimizationReturnValue(return_value, return_shorty == 'L'); + self->PushDeoptimizationContext(return_value, return_shorty == 'L', + nullptr /* no pending exception */); return GetTwoWordSuccessValue(*return_pc, reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint())); } else { diff --git a/runtime/oat.h b/runtime/oat.h index 29dd76ce5e..1520a9bb02 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '6', '8', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '6', '9', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/thread.cc b/runtime/thread.cc index a33e150b93..63534b131b 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -162,27 +162,41 @@ void Thread::ResetQuickAllocEntryPointsForThread() { ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints); } -class DeoptimizationReturnValueRecord { +class DeoptimizationContextRecord { public: - DeoptimizationReturnValueRecord(const JValue& ret_val, - bool is_reference, - DeoptimizationReturnValueRecord* link) - : ret_val_(ret_val), is_reference_(is_reference), link_(link) {} + DeoptimizationContextRecord(const JValue& ret_val, bool is_reference, + mirror::Throwable* pending_exception, + DeoptimizationContextRecord* link) + : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception), + link_(link) {} JValue GetReturnValue() const { return ret_val_; } bool IsReference() const { return is_reference_; } - DeoptimizationReturnValueRecord* GetLink() const { return link_; } - mirror::Object** GetGCRoot() { + mirror::Throwable* GetPendingException() const { return pending_exception_; } + DeoptimizationContextRecord* GetLink() const { return link_; } + mirror::Object** GetReturnValueAsGCRoot() { DCHECK(is_reference_); return ret_val_.GetGCRoot(); } + mirror::Object** GetPendingExceptionAsGCRoot() { + return reinterpret_cast<mirror::Object**>(&pending_exception_); + } private: + // The value returned by the method at the top of the stack before deoptimization. JValue ret_val_; + + // Indicates whether the returned value is a reference. If so, the GC will visit it. const bool is_reference_; - DeoptimizationReturnValueRecord* const link_; - DISALLOW_COPY_AND_ASSIGN(DeoptimizationReturnValueRecord); + // The exception that was pending before deoptimization (or null if there was no pending + // exception). + mirror::Throwable* pending_exception_; + + // A link to the previous DeoptimizationContextRecord. + DeoptimizationContextRecord* const link_; + + DISALLOW_COPY_AND_ASSIGN(DeoptimizationContextRecord); }; class StackedShadowFrameRecord { @@ -206,22 +220,28 @@ class StackedShadowFrameRecord { DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord); }; -void Thread::PushAndClearDeoptimizationReturnValue() { - DeoptimizationReturnValueRecord* record = new DeoptimizationReturnValueRecord( - tls64_.deoptimization_return_value, - tls32_.deoptimization_return_value_is_reference, - tlsPtr_.deoptimization_return_value_stack); - tlsPtr_.deoptimization_return_value_stack = record; - ClearDeoptimizationReturnValue(); +void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference, + mirror::Throwable* exception) { + DeoptimizationContextRecord* record = new DeoptimizationContextRecord( + return_value, + is_reference, + exception, + tlsPtr_.deoptimization_context_stack); + tlsPtr_.deoptimization_context_stack = record; +} + +void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) { + AssertHasDeoptimizationContext(); + DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack; + tlsPtr_.deoptimization_context_stack = record->GetLink(); + result->SetJ(record->GetReturnValue().GetJ()); + *exception = record->GetPendingException(); + delete record; } -JValue Thread::PopDeoptimizationReturnValue() { - DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack; - DCHECK(record != nullptr); - tlsPtr_.deoptimization_return_value_stack = record->GetLink(); - JValue ret_val(record->GetReturnValue()); - delete record; - return ret_val; +void Thread::AssertHasDeoptimizationContext() { + CHECK(tlsPtr_.deoptimization_context_stack != nullptr) + << "No deoptimization context for thread " << *this; } void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type) { @@ -1575,6 +1595,9 @@ Thread::~Thread() { CHECK(tlsPtr_.flip_function == nullptr); CHECK_EQ(tls32_.suspended_at_suspend_check, false); + // Make sure we processed all deoptimization requests. + CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization"; + // We may be deleting a still born thread. SetStateUnsafe(kTerminated); @@ -2593,7 +2616,7 @@ void Thread::VisitRoots(RootVisitor* visitor) { visitor->VisitRootIfNonNull(&tlsPtr_.opeer, RootInfo(kRootThreadObject, thread_id)); if (tlsPtr_.exception != nullptr && tlsPtr_.exception != GetDeoptimizationException()) { visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception), - RootInfo(kRootNativeStack, thread_id)); + RootInfo(kRootNativeStack, thread_id)); } visitor->VisitRootIfNonNull(&tlsPtr_.monitor_enter_object, RootInfo(kRootNativeStack, thread_id)); tlsPtr_.jni_env->locals.VisitRoots(visitor, RootInfo(kRootJNILocal, thread_id)); @@ -2602,6 +2625,7 @@ void Thread::VisitRoots(RootVisitor* visitor) { if (tlsPtr_.debug_invoke_req != nullptr) { tlsPtr_.debug_invoke_req->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id)); } + // Visit roots for deoptimization. if (tlsPtr_.stacked_shadow_frame_record != nullptr) { RootCallbackVisitor visitor_to_callback(visitor, thread_id); ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback); @@ -2615,14 +2639,16 @@ void Thread::VisitRoots(RootVisitor* visitor) { } } } - if (tlsPtr_.deoptimization_return_value_stack != nullptr) { - for (DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack; + if (tlsPtr_.deoptimization_context_stack != nullptr) { + for (DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack; record != nullptr; record = record->GetLink()) { if (record->IsReference()) { - visitor->VisitRootIfNonNull(record->GetGCRoot(), - RootInfo(kRootThreadObject, thread_id)); + visitor->VisitRootIfNonNull(record->GetReturnValueAsGCRoot(), + RootInfo(kRootThreadObject, thread_id)); } + visitor->VisitRootIfNonNull(record->GetPendingExceptionAsGCRoot(), + RootInfo(kRootThreadObject, thread_id)); } } for (auto* verifier = tlsPtr_.method_verifier; verifier != nullptr; verifier = verifier->link_) { diff --git a/runtime/thread.h b/runtime/thread.h index 9bb57bfb6b..2d450f5f5d 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -77,7 +77,7 @@ class ClassLinker; class Closure; class Context; struct DebugInvokeReq; -class DeoptimizationReturnValueRecord; +class DeoptimizationContextRecord; class DexFile; class JavaVMExt; struct JNIEnvExt; @@ -830,19 +830,13 @@ class Thread { // and execute Java code, so there might be nested deoptimizations happening. // We need to save the ongoing deoptimization shadow frames and return // values on stacks. - void SetDeoptimizationReturnValue(const JValue& ret_val, bool is_reference) { - tls64_.deoptimization_return_value.SetJ(ret_val.GetJ()); - tls32_.deoptimization_return_value_is_reference = is_reference; - } - bool IsDeoptimizationReturnValueReference() { - return tls32_.deoptimization_return_value_is_reference; - } - void ClearDeoptimizationReturnValue() { - tls64_.deoptimization_return_value.SetJ(0); - tls32_.deoptimization_return_value_is_reference = false; - } - void PushAndClearDeoptimizationReturnValue(); - JValue PopDeoptimizationReturnValue(); + void PushDeoptimizationContext(const JValue& return_value, bool is_reference, + mirror::Throwable* exception) + SHARED_REQUIRES(Locks::mutator_lock_); + void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) + SHARED_REQUIRES(Locks::mutator_lock_); + void AssertHasDeoptimizationContext() + SHARED_REQUIRES(Locks::mutator_lock_); void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type); ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type); @@ -1102,9 +1096,8 @@ class Thread { suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0), daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0), thread_exit_check_count(0), handling_signal_(false), - deoptimization_return_value_is_reference(false), suspended_at_suspend_check(false), - ready_for_debug_invoke(false), debug_method_entry_(false), is_gc_marking(false), - weak_ref_access_enabled(true) { + suspended_at_suspend_check(false), ready_for_debug_invoke(false), + debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true) { } union StateAndFlags state_and_flags; @@ -1144,10 +1137,6 @@ class Thread { // True if signal is being handled by this thread. bool32_t handling_signal_; - // True if the return value for interpreter after deoptimization is a reference. - // For gc purpose. - bool32_t deoptimization_return_value_is_reference; - // True if the thread is suspended in FullSuspendCheck(). This is // used to distinguish runnable threads that are suspended due to // a normal suspend check from other threads. @@ -1178,15 +1167,12 @@ class Thread { } tls32_; struct PACKED(8) tls_64bit_sized_values { - tls_64bit_sized_values() : trace_clock_base(0), deoptimization_return_value() { + tls_64bit_sized_values() : trace_clock_base(0) { } // The clock base used for tracing. uint64_t trace_clock_base; - // Return value used by deoptimization. - JValue deoptimization_return_value; - RuntimeStats stats; } tls64_; @@ -1197,7 +1183,7 @@ class Thread { stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr), top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr), instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr), - stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr), + stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr), name(nullptr), pthread_self(0), last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0), @@ -1282,7 +1268,7 @@ class Thread { StackedShadowFrameRecord* stacked_shadow_frame_record; // Deoptimization return value record stack. - DeoptimizationReturnValueRecord* deoptimization_return_value_stack; + DeoptimizationContextRecord* deoptimization_context_stack; // A cached copy of the java.lang.Thread's name. std::string* name; |