Fix deoptimization with pending exception
When deoptimizing the stack, we set a fake exception in the current
Thread* (see method Thread::GetDeoptimizationException). On the next
exception check, the QuickExceptionHandler will deoptimize the stack.
The issue is when we deoptimize while an exception is already pending
in the current Thread*: setting the fake exception will clobber the
pending exception which is not correct. This happens in the
artQuickToInterpreterBridge when returning from the interpreter and
we want to deoptimize the stack for debugging (like single-stepping).
This CL saves the pending exception before asking for deoptimization.
Then the exception is restored just before executing the deoptimized
frames with the interpreter.
Also cleans up the way we save deoptimization context (return value
and pending exception).
Bug: 23371176
Bug: 19944235
Change-Id: I7f4c8347b328817c452beda3399e210eba3a88a4
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f4c6473..6d0d547 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -433,9 +433,16 @@
self->ClearException();
ShadowFrame* shadow_frame =
self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
- result->SetJ(self->PopDeoptimizationReturnValue().GetJ());
+ mirror::Throwable* pending_exception = nullptr;
+ self->PopDeoptimizationContext(result, &pending_exception);
self->SetTopOfStack(nullptr);
self->SetTopOfShadowStack(shadow_frame);
+
+ // Restore the exception that was pending before deoptimization then interpret the
+ // deoptimized frames.
+ if (pending_exception != nullptr) {
+ self->SetException(pending_exception);
+ }
interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
}
if (kLogInvocationStartAndReturn) {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 084c88e..5c1922e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,7 +89,7 @@
art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 136
+#define THREAD_CARD_TABLE_OFFSET 128
ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index a4feac1..0f77647 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -36,7 +36,7 @@
self->Dump(LOG(INFO));
}
- self->PushAndClearDeoptimizationReturnValue();
+ self->AssertHasDeoptimizationContext();
self->SetException(Thread::GetDeoptimizationException());
self->QuickDeliverException();
}
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index ad5ee84..8e660a2 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -51,6 +51,9 @@
uint64_t gpr_result,
uint64_t fpr_result)
SHARED_REQUIRES(Locks::mutator_lock_) {
+ // Instrumentation exit stub must not be entered with a pending exception.
+ CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
+ << self->GetException()->Dump();
// Compute address of return PC and sanity check that it currently holds 0.
size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index da4b82c..74270de 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -675,8 +675,12 @@
// Request a stack deoptimization if needed
ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+ // Push the context of the deoptimization stack so we can restore the return value and the
+ // exception before executing the deoptimized frames.
+ self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+
+ // Set special exception to cause deoptimization.
self->SetException(Thread::GetDeoptimizationException());
- self->SetDeoptimizationReturnValue(result, shorty[0] == 'L');
}
// No need to restore the args since the method has already been run by the interpreter.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f7a3cd5..7db8888 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -72,15 +72,12 @@
EXPECT_OFFSET_DIFFP(Thread, tls32_, throwing_OutOfMemoryError, no_thread_suspension, 4);
EXPECT_OFFSET_DIFFP(Thread, tls32_, no_thread_suspension, thread_exit_check_count, 4);
EXPECT_OFFSET_DIFFP(Thread, tls32_, thread_exit_check_count, handling_signal_, 4);
- EXPECT_OFFSET_DIFFP(Thread, tls32_, handling_signal_,
- deoptimization_return_value_is_reference, 4);
// TODO: Better connection. Take alignment into account.
EXPECT_OFFSET_DIFF_GT3(Thread, tls32_.thread_exit_check_count, tls64_.trace_clock_base, 4,
thread_tls32_to_tls64);
- EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, deoptimization_return_value, 8);
- EXPECT_OFFSET_DIFFP(Thread, tls64_, deoptimization_return_value, stats, 8);
+ EXPECT_OFFSET_DIFFP(Thread, tls64_, trace_clock_base, stats, 8);
// TODO: Better connection. Take alignment into account.
EXPECT_OFFSET_DIFF_GT3(Thread, tls64_.stats, tlsPtr_.card_table, 8, thread_tls64_to_tlsptr);
@@ -108,8 +105,8 @@
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, single_step_control, stacked_shadow_frame_record,
sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stacked_shadow_frame_record,
- deoptimization_return_value_stack, sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_return_value_stack, name, sizeof(void*));
+ deoptimization_context_stack, sizeof(void*));
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_context_stack, name, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause,
sizeof(void*));
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e28d578..63c02ed 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1016,7 +1016,8 @@
PrettyMethod(method).c_str(),
return_value.GetJ()) << *self;
}
- self->SetDeoptimizationReturnValue(return_value, return_shorty == 'L');
+ self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+ nullptr /* no pending exception */);
return GetTwoWordSuccessValue(*return_pc,
reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
} else {
diff --git a/runtime/oat.h b/runtime/oat.h
index 29dd76c..1520a9b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '6', '8', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '6', '9', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index a33e150..63534b1 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -162,27 +162,41 @@
ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
}
-class DeoptimizationReturnValueRecord {
+class DeoptimizationContextRecord {
public:
- DeoptimizationReturnValueRecord(const JValue& ret_val,
- bool is_reference,
- DeoptimizationReturnValueRecord* link)
- : ret_val_(ret_val), is_reference_(is_reference), link_(link) {}
+ DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+ mirror::Throwable* pending_exception,
+ DeoptimizationContextRecord* link)
+ : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+ link_(link) {}
JValue GetReturnValue() const { return ret_val_; }
bool IsReference() const { return is_reference_; }
- DeoptimizationReturnValueRecord* GetLink() const { return link_; }
- mirror::Object** GetGCRoot() {
+ mirror::Throwable* GetPendingException() const { return pending_exception_; }
+ DeoptimizationContextRecord* GetLink() const { return link_; }
+ mirror::Object** GetReturnValueAsGCRoot() {
DCHECK(is_reference_);
return ret_val_.GetGCRoot();
}
+ mirror::Object** GetPendingExceptionAsGCRoot() {
+ return reinterpret_cast<mirror::Object**>(&pending_exception_);
+ }
private:
+ // The value returned by the method at the top of the stack before deoptimization.
JValue ret_val_;
- const bool is_reference_;
- DeoptimizationReturnValueRecord* const link_;
- DISALLOW_COPY_AND_ASSIGN(DeoptimizationReturnValueRecord);
+ // Indicates whether the returned value is a reference. If so, the GC will visit it.
+ const bool is_reference_;
+
+ // The exception that was pending before deoptimization (or null if there was no pending
+ // exception).
+ mirror::Throwable* pending_exception_;
+
+ // A link to the previous DeoptimizationContextRecord.
+ DeoptimizationContextRecord* const link_;
+
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationContextRecord);
};
class StackedShadowFrameRecord {
@@ -206,22 +220,28 @@
DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
};
-void Thread::PushAndClearDeoptimizationReturnValue() {
- DeoptimizationReturnValueRecord* record = new DeoptimizationReturnValueRecord(
- tls64_.deoptimization_return_value,
- tls32_.deoptimization_return_value_is_reference,
- tlsPtr_.deoptimization_return_value_stack);
- tlsPtr_.deoptimization_return_value_stack = record;
- ClearDeoptimizationReturnValue();
+void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+ mirror::Throwable* exception) {
+ DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
+ return_value,
+ is_reference,
+ exception,
+ tlsPtr_.deoptimization_context_stack);
+ tlsPtr_.deoptimization_context_stack = record;
}
-JValue Thread::PopDeoptimizationReturnValue() {
- DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack;
- DCHECK(record != nullptr);
- tlsPtr_.deoptimization_return_value_stack = record->GetLink();
- JValue ret_val(record->GetReturnValue());
+void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+ AssertHasDeoptimizationContext();
+ DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
+ tlsPtr_.deoptimization_context_stack = record->GetLink();
+ result->SetJ(record->GetReturnValue().GetJ());
+ *exception = record->GetPendingException();
delete record;
- return ret_val;
+}
+
+void Thread::AssertHasDeoptimizationContext() {
+ CHECK(tlsPtr_.deoptimization_context_stack != nullptr)
+ << "No deoptimization context for thread " << *this;
}
void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type) {
@@ -1575,6 +1595,9 @@
CHECK(tlsPtr_.flip_function == nullptr);
CHECK_EQ(tls32_.suspended_at_suspend_check, false);
+ // Make sure we processed all deoptimization requests.
+ CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";
+
// We may be deleting a still born thread.
SetStateUnsafe(kTerminated);
@@ -2593,7 +2616,7 @@
visitor->VisitRootIfNonNull(&tlsPtr_.opeer, RootInfo(kRootThreadObject, thread_id));
if (tlsPtr_.exception != nullptr && tlsPtr_.exception != GetDeoptimizationException()) {
visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception),
- RootInfo(kRootNativeStack, thread_id));
+ RootInfo(kRootNativeStack, thread_id));
}
visitor->VisitRootIfNonNull(&tlsPtr_.monitor_enter_object, RootInfo(kRootNativeStack, thread_id));
tlsPtr_.jni_env->locals.VisitRoots(visitor, RootInfo(kRootJNILocal, thread_id));
@@ -2602,6 +2625,7 @@
if (tlsPtr_.debug_invoke_req != nullptr) {
tlsPtr_.debug_invoke_req->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id));
}
+ // Visit roots for deoptimization.
if (tlsPtr_.stacked_shadow_frame_record != nullptr) {
RootCallbackVisitor visitor_to_callback(visitor, thread_id);
ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
@@ -2615,14 +2639,16 @@
}
}
}
- if (tlsPtr_.deoptimization_return_value_stack != nullptr) {
- for (DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack;
+ if (tlsPtr_.deoptimization_context_stack != nullptr) {
+ for (DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
record != nullptr;
record = record->GetLink()) {
if (record->IsReference()) {
- visitor->VisitRootIfNonNull(record->GetGCRoot(),
- RootInfo(kRootThreadObject, thread_id));
+ visitor->VisitRootIfNonNull(record->GetReturnValueAsGCRoot(),
+ RootInfo(kRootThreadObject, thread_id));
}
+ visitor->VisitRootIfNonNull(record->GetPendingExceptionAsGCRoot(),
+ RootInfo(kRootThreadObject, thread_id));
}
}
for (auto* verifier = tlsPtr_.method_verifier; verifier != nullptr; verifier = verifier->link_) {
diff --git a/runtime/thread.h b/runtime/thread.h
index 959af95..f7eb363 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -77,7 +77,7 @@
class Closure;
class Context;
struct DebugInvokeReq;
-class DeoptimizationReturnValueRecord;
+class DeoptimizationContextRecord;
class DexFile;
class JavaVMExt;
struct JNIEnvExt;
@@ -830,19 +830,13 @@
// and execute Java code, so there might be nested deoptimizations happening.
// We need to save the ongoing deoptimization shadow frames and return
// values on stacks.
- void SetDeoptimizationReturnValue(const JValue& ret_val, bool is_reference) {
- tls64_.deoptimization_return_value.SetJ(ret_val.GetJ());
- tls32_.deoptimization_return_value_is_reference = is_reference;
- }
- bool IsDeoptimizationReturnValueReference() {
- return tls32_.deoptimization_return_value_is_reference;
- }
- void ClearDeoptimizationReturnValue() {
- tls64_.deoptimization_return_value.SetJ(0);
- tls32_.deoptimization_return_value_is_reference = false;
- }
- void PushAndClearDeoptimizationReturnValue();
- JValue PopDeoptimizationReturnValue();
+ void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+ mirror::Throwable* exception)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+ void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+ void AssertHasDeoptimizationContext()
+ SHARED_REQUIRES(Locks::mutator_lock_);
void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type);
ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type);
@@ -1102,9 +1096,8 @@
suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
thread_exit_check_count(0), handling_signal_(false),
- deoptimization_return_value_is_reference(false), suspended_at_suspend_check(false),
- ready_for_debug_invoke(false), debug_method_entry_(false), is_gc_marking(false),
- weak_ref_access_enabled(true) {
+ suspended_at_suspend_check(false), ready_for_debug_invoke(false),
+ debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true) {
}
union StateAndFlags state_and_flags;
@@ -1144,10 +1137,6 @@
// True if signal is being handled by this thread.
bool32_t handling_signal_;
- // True if the return value for interpreter after deoptimization is a reference.
- // For gc purpose.
- bool32_t deoptimization_return_value_is_reference;
-
// True if the thread is suspended in FullSuspendCheck(). This is
// used to distinguish runnable threads that are suspended due to
// a normal suspend check from other threads.
@@ -1178,15 +1167,12 @@
} tls32_;
struct PACKED(8) tls_64bit_sized_values {
- tls_64bit_sized_values() : trace_clock_base(0), deoptimization_return_value() {
+ tls_64bit_sized_values() : trace_clock_base(0) {
}
// The clock base used for tracing.
uint64_t trace_clock_base;
- // Return value used by deoptimization.
- JValue deoptimization_return_value;
-
RuntimeStats stats;
} tls64_;
@@ -1197,7 +1183,7 @@
stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr),
top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
- stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr),
+ stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
name(nullptr), pthread_self(0),
last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
@@ -1281,7 +1267,7 @@
StackedShadowFrameRecord* stacked_shadow_frame_record;
// Deoptimization return value record stack.
- DeoptimizationReturnValueRecord* deoptimization_return_value_stack;
+ DeoptimizationContextRecord* deoptimization_context_stack;
// A cached copy of the java.lang.Thread's name.
std::string* name;