ART: Single-frame deopt
Add deoptimization of a single frame. Works by removing the managed
code frame and jumping into the quick-to-interpreter bridge, and
the bridge understanding a stored ShadowFrame.
We need a separate fixup pass. For x86, we leave the return address
on the stack so we don't need to push it there.
Bug: 21611912
Change-Id: I06625685ced8b054244f8685ab50b238a705b9d2
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 403d348..8f6b1ff 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -30,9 +30,11 @@
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
gprs_[PC] = &pc_;
+ gprs_[R0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = ArmContext::kBadGprBase + SP;
pc_ = ArmContext::kBadGprBase + PC;
+ arg0_ = 0;
}
void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 77bb5c8..ea31055 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -45,6 +45,10 @@
SetGPR(PC, new_pc);
}
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(R0, new_arg0_value);
+ }
+
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
return gprs_[reg] != nullptr;
@@ -84,7 +88,7 @@
uintptr_t* gprs_[kNumberOfCoreRegisters];
uint32_t* fprs_[kNumberOfSRegisters];
// Hold values for sp and pc if they are not located within a stack frame.
- uintptr_t sp_, pc_;
+ uintptr_t sp_, pc_, arg0_;
};
} // namespace arm
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index e45d828..dc1cf8a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -437,8 +437,8 @@
ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14)
add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3
ldm r0, {r3-r13} @ load remaining gprs from argument gprs_
- mov r0, #0 @ clear result registers r0 and r1
- mov r1, #0
+ ldr r0, [r0, #-12] @ load r0 value
+ mov r1, #0 @ clear result register r1
bx r2 @ do long jump
END art_quick_do_long_jump
@@ -1142,7 +1142,7 @@
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
- * will long jump to the upcall with a special exception of -1.
+ * will long jump to the interpreter bridge.
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 60becc6..4477631 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -31,10 +31,12 @@
std::fill_n(gprs_, arraysize(gprs_), nullptr);
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
- gprs_[LR] = &pc_;
+ gprs_[kPC] = &pc_;
+ gprs_[X0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = Arm64Context::kBadGprBase + SP;
- pc_ = Arm64Context::kBadGprBase + LR;
+ pc_ = Arm64Context::kBadGprBase + kPC;
+ arg0_ = 0;
}
void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
@@ -58,8 +60,8 @@
}
void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
- DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
- DCHECK_NE(reg, static_cast<uint32_t>(XZR));
+ DCHECK_LT(reg, arraysize(gprs_));
+ // Note: we use kPC == XZR, so do not ensure that reg != XZR.
DCHECK(IsAccessibleGPR(reg));
DCHECK_NE(gprs_[reg], &gZero); // Can't overwrite this static value since they are never reset.
*gprs_[reg] = value;
@@ -124,13 +126,13 @@
extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*);
void Arm64Context::DoLongJump() {
- uint64_t gprs[kNumberOfXRegisters];
+ uint64_t gprs[arraysize(gprs_)];
uint64_t fprs[kNumberOfDRegisters];
// The long jump routine called below expects to find the value for SP at index 31.
DCHECK_EQ(SP, 31);
- for (size_t i = 0; i < kNumberOfXRegisters; ++i) {
+ for (size_t i = 0; i < arraysize(gprs_); ++i) {
gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
}
for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 1c99f3c..11314e0 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -42,20 +42,25 @@
}
void SetPC(uintptr_t new_lr) OVERRIDE {
- SetGPR(LR, new_lr);
+ SetGPR(kPC, new_lr);
+ }
+
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(X0, new_arg0_value);
}
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
- DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+ DCHECK_LT(reg, arraysize(gprs_));
return gprs_[reg] != nullptr;
}
uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
- DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+ DCHECK_LT(reg, arraysize(gprs_));
return gprs_[reg];
}
uintptr_t GetGPR(uint32_t reg) OVERRIDE {
+ // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value.
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
DCHECK(IsAccessibleGPR(reg));
return *gprs_[reg];
@@ -79,12 +84,15 @@
void SmashCallerSaves() OVERRIDE;
NO_RETURN void DoLongJump() OVERRIDE;
+ static constexpr size_t kPC = kNumberOfXRegisters;
+
private:
- // Pointers to register locations, initialized to null or the specific registers below.
- uintptr_t* gprs_[kNumberOfXRegisters];
+ // Pointers to register locations, initialized to null or the specific registers below. We need
+ // an additional one for the PC.
+ uintptr_t* gprs_[kNumberOfXRegisters + 1];
uint64_t * fprs_[kNumberOfDRegisters];
- // Hold values for sp and pc if they are not located within a stack frame.
- uintptr_t sp_, pc_;
+ // Hold values for sp, pc and arg0 if they are not located within a stack frame.
+ uintptr_t sp_, pc_, arg0_;
};
} // namespace arm64
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 169bc38..6812178 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -941,7 +941,7 @@
// Load GPRs
// TODO: lots of those are smashed, could optimize.
add x0, x0, #30*8
- ldp x30, x1, [x0], #-16
+ ldp x30, x1, [x0], #-16 // LR & SP
ldp x28, x29, [x0], #-16
ldp x26, x27, [x0], #-16
ldp x24, x25, [x0], #-16
@@ -958,10 +958,12 @@
ldp x2, x3, [x0], #-16
mov sp, x1
- // TODO: Is it really OK to use LR for the target PC?
- mov x0, #0
- mov x1, #0
- br xLR
+ // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
+ ldr x1, [x0, #33*8]
+ // And the value of x0.
+ ldr x0, [x0]
+
+ br x1
END art_quick_do_long_jump
/*
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 9ef761e..9af7c04 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -50,6 +50,9 @@
// Sets the program counter value.
virtual void SetPC(uintptr_t new_pc) = 0;
+ // Sets the first argument register.
+ virtual void SetArg0(uintptr_t new_arg0_value) = 0;
+
// Returns whether the given GPR is accessible (read or write).
virtual bool IsAccessibleGPR(uint32_t reg) = 0;
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index bc2bf68..08ab356 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -30,9 +30,11 @@
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
gprs_[RA] = &ra_;
+ gprs_[A0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = MipsContext::kBadGprBase + SP;
ra_ = MipsContext::kBadGprBase + RA;
+ arg0_ = 0;
}
void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 38cf29a..0affe53 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -78,12 +78,17 @@
void SmashCallerSaves() OVERRIDE;
NO_RETURN void DoLongJump() OVERRIDE;
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(A0, new_arg0_value);
+ }
+
private:
// Pointers to registers in the stack, initialized to null except for the special cases below.
uintptr_t* gprs_[kNumberOfCoreRegisters];
uint32_t* fprs_[kNumberOfFRegisters];
- // Hold values for sp and ra (return address) if they are not located within a stack frame.
- uintptr_t sp_, ra_;
+ // Hold values for sp and ra (return address) if they are not located within a stack frame, as
+ // well as the first argument.
+ uintptr_t sp_, ra_, arg0_;
};
} // namespace mips
} // namespace art
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index cc6dc7e..2c17f1c 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -30,9 +30,11 @@
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
gprs_[T9] = &t9_;
+ gprs_[A0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = Mips64Context::kBadGprBase + SP;
t9_ = Mips64Context::kBadGprBase + T9;
+ arg0_ = 0;
}
void Mips64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index 26fbcfe..84b1c9b 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -78,14 +78,20 @@
void SmashCallerSaves() OVERRIDE;
NO_RETURN void DoLongJump() OVERRIDE;
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(A0, new_arg0_value);
+ }
+
private:
// Pointers to registers in the stack, initialized to null except for the special cases below.
uintptr_t* gprs_[kNumberOfGpuRegisters];
uint64_t* fprs_[kNumberOfFpuRegisters];
// Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the
- // PC (as ra is required to be valid for single-frame deopt and must not be clobbered).
- uintptr_t sp_, t9_;
+ // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We
+ // also need the first argument for single-frame deopt.
+ uintptr_t sp_, t9_, arg0_;
};
+
} // namespace mips64
} // namespace art
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 7096c82..987ad60 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -29,9 +29,11 @@
std::fill_n(gprs_, arraysize(gprs_), nullptr);
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[ESP] = &esp_;
+ gprs_[EAX] = &arg0_;
// Initialize registers with easy to spot debug values.
esp_ = X86Context::kBadGprBase + ESP;
eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters;
+ arg0_ = 0;
}
void X86Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index c4a11d8..59beb12 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -44,6 +44,10 @@
eip_ = new_pc;
}
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(EAX, new_arg0_value);
+ }
+
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
return gprs_[reg] != nullptr;
@@ -95,10 +99,10 @@
// Pointers to register locations. Values are initialized to null or the special registers below.
uintptr_t* gprs_[kNumberOfCpuRegisters];
uint32_t* fprs_[kNumberOfFloatRegisters];
- // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat
+ // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat
// special in that it cannot be encoded normally as a register operand to an instruction (except
// in 64bit addressing modes).
- uintptr_t esp_, eip_;
+ uintptr_t esp_, eip_, arg0_;
};
} // namespace x86
} // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 029a296..f3b15c9 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1695,7 +1695,7 @@
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
- * will long jump to the upcall with a special exception of -1.
+ * will long jump to the interpreter bridge.
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 1fe2ef8..3dc7d71 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -29,9 +29,11 @@
std::fill_n(gprs_, arraysize(gprs_), nullptr);
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[RSP] = &rsp_;
+ gprs_[RDI] = &arg0_;
// Initialize registers with easy to spot debug values.
rsp_ = X86_64Context::kBadGprBase + RSP;
rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters;
+ arg0_ = 0;
}
void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 30bb9ec..f05b7f0 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -44,6 +44,10 @@
rip_ = new_pc;
}
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(RDI, new_arg0_value);
+ }
+
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
return gprs_[reg] != nullptr;
@@ -82,10 +86,10 @@
// Pointers to register locations. Values are initialized to null or the special registers below.
uintptr_t* gprs_[kNumberOfCpuRegisters];
uint64_t* fprs_[kNumberOfFloatRegisters];
- // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat
+ // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat
// special in that it cannot be encoded normally as a register operand to an instruction (except
// in 64bit addressing modes).
- uintptr_t rsp_, rip_;
+ uintptr_t rsp_, rip_, arg0_;
};
} // namespace x86_64
} // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 861f802..2f438a3 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1724,18 +1724,18 @@
* will long jump to the upcall with a special exception of -1.
*/
DEFINE_FUNCTION art_quick_deoptimize
- pushq %rsi // Entry point for a jump. Fake that we were called.
- // Use hidden arg.
+ pushq %rsi // Entry point for a jump. Fake that we were called.
+ // Use hidden arg.
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
- // Stack should be aligned now.
- movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
- call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
+ // Stack should be aligned now.
+ movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
+ call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
UNREACHABLE
END_FUNCTION art_quick_deoptimize
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
- * will long jump to the upcall with a special exception of -1.
+ * will long jump to the interpreter bridge.
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index d749664..dfd9fcd 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -22,13 +22,16 @@
#include "mirror/class-inl.h"
#include "mirror/object_array-inl.h"
#include "mirror/object-inl.h"
+#include "quick_exception_handler.h"
#include "stack.h"
#include "thread.h"
#include "verifier/method_verifier.h"
namespace art {
-NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+ ScopedQuickEntrypointChecks sqec(self);
+
if (VLOG_IS_ON(deopt)) {
LOG(INFO) << "Deopting:";
self->Dump(LOG(INFO));
@@ -39,19 +42,26 @@
self->QuickDeliverException();
}
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
- ScopedQuickEntrypointChecks sqec(self);
- artDeoptimizeImpl(self);
-}
-
extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
+
+ // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
+ // specialized visitor that will show whether a method is Quick or Shadow.
+
// Before deoptimizing to interpreter, we must push the deoptimization context.
JValue return_value;
return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result.
self->PushDeoptimizationContext(return_value, false, self->GetException());
- artDeoptimizeImpl(self);
+
+ QuickExceptionHandler exception_handler(self, true);
+ exception_handler.DeoptimizeSingleFrame();
+ exception_handler.UpdateInstrumentationStack();
+ exception_handler.DeoptimizeSingleFrameArchDependentFixup();
+ // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
+ // be caller-saved. This has the downside that we cannot track incorrect register usage down the
+ // line.
+ exception_handler.DoLongJump(false);
}
} // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1302c5f..c2488cc 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -29,6 +29,7 @@
#include "mirror/method.h"
#include "mirror/object-inl.h"
#include "mirror/object_array-inl.h"
+#include "quick_exception_handler.h"
#include "runtime.h"
#include "scoped_thread_state_change.h"
#include "debugger.h"
@@ -646,27 +647,85 @@
if (method->IsAbstract()) {
ThrowAbstractMethodError(method);
return 0;
+ }
+
+ JValue tmp_value;
+ ShadowFrame* deopt_frame = self->PopStackedShadowFrame(
+ StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false);
+ const DexFile::CodeItem* code_item = method->GetCodeItem();
+ DCHECK(code_item != nullptr) << PrettyMethod(method);
+ ManagedStack fragment;
+
+ DCHECK(!method->IsNative()) << PrettyMethod(method);
+ uint32_t shorty_len = 0;
+ auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
+ const char* shorty = non_proxy_method->GetShorty(&shorty_len);
+
+ JValue result;
+
+ if (deopt_frame != nullptr) {
+ // Coming from single-frame deopt.
+
+ if (kIsDebugBuild) {
+ // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom
+ // of the call-stack) corresponds to the called method.
+ ShadowFrame* linked = deopt_frame;
+ while (linked->GetLink() != nullptr) {
+ linked = linked->GetLink();
+ }
+ CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " "
+ << PrettyMethod(linked->GetMethod());
+ }
+
+ if (VLOG_IS_ON(deopt)) {
+ // Print out the stack to verify that it was a single-frame deopt.
+ LOG(INFO) << "Continue-ing from deopt. Stack is:";
+ QuickExceptionHandler::DumpFramesWithType(self, true);
+ }
+
+ mirror::Throwable* pending_exception = nullptr;
+ self->PopDeoptimizationContext(&result, &pending_exception);
+
+ // Push a transition back into managed code onto the linked list in thread.
+ self->PushManagedStackFragment(&fragment);
+
+ // Ensure that the stack is still in order.
+ if (kIsDebugBuild) {
+ class DummyStackVisitor : public StackVisitor {
+ public:
+ explicit DummyStackVisitor(Thread* self_in) SHARED_REQUIRES(Locks::mutator_lock_)
+ : StackVisitor(self_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+
+ bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+ // Nothing to do here. In a debug build, SanityCheckFrame will do the work in the walking
+ // logic. Just always say we want to continue.
+ return true;
+ }
+ };
+ DummyStackVisitor dsv(self);
+ dsv.WalkStack();
+ }
+
+ // Restore the exception that was pending before deoptimization then interpret the
+ // deoptimized frames.
+ if (pending_exception != nullptr) {
+ self->SetException(pending_exception);
+ }
+ interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
} else {
- DCHECK(!method->IsNative()) << PrettyMethod(method);
const char* old_cause = self->StartAssertNoThreadSuspension(
"Building interpreter shadow frame");
- const DexFile::CodeItem* code_item = method->GetCodeItem();
- DCHECK(code_item != nullptr) << PrettyMethod(method);
uint16_t num_regs = code_item->registers_size_;
void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
// No last shadow coming from quick.
ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, nullptr, method, 0, memory));
size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
- uint32_t shorty_len = 0;
- auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
- const char* shorty = non_proxy_method->GetShorty(&shorty_len);
BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
shadow_frame, first_arg_reg);
shadow_frame_builder.VisitArguments();
const bool needs_initialization =
method->IsStatic() && !method->GetDeclaringClass()->IsInitialized();
// Push a transition back into managed code onto the linked list in thread.
- ManagedStack fragment;
self->PushManagedStackFragment(&fragment);
self->PushShadowFrame(shadow_frame);
self->EndAssertNoThreadSuspension(old_cause);
@@ -681,24 +740,26 @@
return 0;
}
}
- JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
- // Pop transition.
- self->PopManagedStackFragment(fragment);
- // Request a stack deoptimization if needed
- ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
- if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
- // Push the context of the deoptimization stack so we can restore the return value and the
- // exception before executing the deoptimized frames.
- self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
-
- // Set special exception to cause deoptimization.
- self->SetException(Thread::GetDeoptimizationException());
- }
-
- // No need to restore the args since the method has already been run by the interpreter.
- return result.GetJ();
+ result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
}
+
+ // Pop transition.
+ self->PopManagedStackFragment(fragment);
+
+ // Request a stack deoptimization if needed
+ ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
+ if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+ // Push the context of the deoptimization stack so we can restore the return value and the
+ // exception before executing the deoptimized frames.
+ self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+
+ // Set special exception to cause deoptimization.
+ self->SetException(Thread::GetDeoptimizationException());
+ }
+
+ // No need to restore the args since the method has already been run by the interpreter.
+ return result.GetJ();
}
// Visits arguments on the stack placing them into the args vector, Object* arguments are converted
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 5c13e13..63f43cf 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -20,6 +20,7 @@
#include "art_method-inl.h"
#include "dex_instruction.h"
#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "entrypoints/runtime_asm_entrypoints.h"
#include "handle_scope-inl.h"
#include "mirror/class-inl.h"
@@ -36,8 +37,9 @@
: self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization),
method_tracing_active_(is_deoptimization ||
Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
- handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_method_(nullptr),
- handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) {
+ handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0),
+ handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false),
+ handler_frame_depth_(kInvalidFrameDepth) {
}
// Finds catch handler.
@@ -260,19 +262,25 @@
// Prepares deoptimization.
class DeoptimizeStackVisitor FINAL : public StackVisitor {
public:
- DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler)
+ DeoptimizeStackVisitor(Thread* self,
+ Context* context,
+ QuickExceptionHandler* exception_handler,
+ bool single_frame)
SHARED_REQUIRES(Locks::mutator_lock_)
: StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
exception_handler_(exception_handler),
prev_shadow_frame_(nullptr),
- stacked_shadow_frame_pushed_(false) {
+ stacked_shadow_frame_pushed_(false),
+ single_frame_deopt_(single_frame),
+ single_frame_done_(false) {
}
bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
ArtMethod* method = GetMethod();
- if (method == nullptr) {
- // This is the upcall, we remember the frame and last pc so that we may long jump to them.
+ if (method == nullptr || single_frame_done_) {
+ // This is the upcall (or the next full frame in single-frame deopt), we remember the frame
+ // and last pc so that we may long jump to them.
exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
if (!stacked_shadow_frame_pushed_) {
@@ -295,7 +303,13 @@
CHECK_EQ(GetFrameDepth(), 1U);
return true;
} else {
- return HandleDeoptimization(method);
+ HandleDeoptimization(method);
+ if (single_frame_deopt_ && !IsInInlinedFrame()) {
+ // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
+ exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
+ single_frame_done_ = true;
+ }
+ return true;
}
}
@@ -304,7 +318,7 @@
return static_cast<VRegKind>(kinds.at(reg * 2));
}
- bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
+ void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
const DexFile::CodeItem* code_item = m->GetCodeItem();
CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m);
uint16_t num_regs = code_item->registers_size_;
@@ -448,16 +462,20 @@
// Will be popped after the long jump after DeoptimizeStack(),
// right before interpreter::EnterInterpreterFromDeoptimize().
stacked_shadow_frame_pushed_ = true;
- GetThread()->PushStackedShadowFrame(new_frame,
- StackedShadowFrameType::kDeoptimizationShadowFrame);
+ GetThread()->PushStackedShadowFrame(
+ new_frame,
+ single_frame_deopt_
+ ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
+ : StackedShadowFrameType::kDeoptimizationShadowFrame);
}
prev_shadow_frame_ = new_frame;
- return true;
}
QuickExceptionHandler* const exception_handler_;
ShadowFrame* prev_shadow_frame_;
bool stacked_shadow_frame_pushed_;
+ const bool single_frame_deopt_;
+ bool single_frame_done_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
};
@@ -468,13 +486,46 @@
self_->DumpStack(LOG(INFO) << "Deoptimizing: ");
}
- DeoptimizeStackVisitor visitor(self_, context_, this);
+ DeoptimizeStackVisitor visitor(self_, context_, this, false);
visitor.WalkStack(true);
// Restore deoptimization exception
self_->SetException(Thread::GetDeoptimizationException());
}
+void QuickExceptionHandler::DeoptimizeSingleFrame() {
+ DCHECK(is_deoptimization_);
+
+ if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
+ LOG(INFO) << "Single-frame deopting:";
+ DumpFramesWithType(self_, true);
+ }
+
+ DeoptimizeStackVisitor visitor(self_, context_, this, true);
+ visitor.WalkStack(true);
+
+ // PC needs to be of the quick-to-interpreter bridge.
+ int32_t offset;
+ #ifdef __LP64__
+ offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
+ #else
+ offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
+ #endif
+ handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
+ reinterpret_cast<uint8_t*>(self_) + offset);
+}
+
+void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() {
+ // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
+
+ if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) {
+ // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to
+ // change how longjump works.
+ handler_quick_frame_ = reinterpret_cast<ArtMethod**>(
+ reinterpret_cast<uintptr_t>(handler_quick_frame_) - sizeof(void*));
+ }
+}
+
// Unwinds all instrumentation stack frame prior to catch handler or upcall.
class InstrumentationStackVisitor : public StackVisitor {
public:
@@ -529,15 +580,67 @@
}
}
-void QuickExceptionHandler::DoLongJump() {
+void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
// Place context back on thread so it will be available when we continue.
self_->ReleaseLongJumpContext(context_);
context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_));
CHECK_NE(handler_quick_frame_pc_, 0u);
context_->SetPC(handler_quick_frame_pc_);
- context_->SmashCallerSaves();
+ context_->SetArg0(handler_quick_arg0_);
+ if (smash_caller_saves) {
+ context_->SmashCallerSaves();
+ }
context_->DoLongJump();
UNREACHABLE();
}
+// Prints out methods with their type of frame.
+class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor {
+ public:
+ DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
+ SHARED_REQUIRES(Locks::mutator_lock_)
+ : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+ show_details_(show_details) {}
+
+ bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+ ArtMethod* method = GetMethod();
+ if (show_details_) {
+ LOG(INFO) << "|> pc = " << std::hex << GetCurrentQuickFramePc();
+ LOG(INFO) << "|> addr = " << std::hex << reinterpret_cast<uintptr_t>(GetCurrentQuickFrame());
+ if (GetCurrentQuickFrame() != nullptr && method != nullptr) {
+ LOG(INFO) << "|> ret = " << std::hex << GetReturnPc();
+ }
+ }
+ if (method == nullptr) {
+ // Transition, do go on, we want to unwind over bridges, all the way.
+ if (show_details_) {
+ LOG(INFO) << "N <transition>";
+ }
+ return true;
+ } else if (method->IsRuntimeMethod()) {
+ if (show_details_) {
+ LOG(INFO) << "R " << PrettyMethod(method, true);
+ }
+ return true;
+ } else {
+ bool is_shadow = GetCurrentShadowFrame() != nullptr;
+ LOG(INFO) << (is_shadow ? "S" : "Q")
+ << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ")
+ << " "
+ << PrettyMethod(method, true);
+ return true; // Go on.
+ }
+ }
+
+ private:
+ bool show_details_;
+
+ DISALLOW_COPY_AND_ASSIGN(DumpFramesWithTypeStackVisitor);
+};
+
+void QuickExceptionHandler::DumpFramesWithType(Thread* self, bool details) {
+ DumpFramesWithTypeStackVisitor visitor(self, details);
+ visitor.WalkStack(true);
+}
+
} // namespace art
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index 2e05c7e..89d6a25 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -49,6 +49,9 @@
// Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
// shadow frame that will be executed with the interpreter.
void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+ void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_);
+ void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_);
+
// Update the instrumentation stack by removing all methods that will be unwound
// by the exception being thrown.
void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -58,7 +61,7 @@
SHARED_REQUIRES(Locks::mutator_lock_);
// Long jump either to a catch handler or to the upcall.
- NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_);
+ NO_RETURN void DoLongJump(bool smash_caller_saves = true) SHARED_REQUIRES(Locks::mutator_lock_);
void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) {
handler_quick_frame_ = handler_quick_frame;
@@ -68,6 +71,10 @@
handler_quick_frame_pc_ = handler_quick_frame_pc;
}
+ void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) {
+ handler_quick_arg0_ = handler_quick_arg0;
+ }
+
ArtMethod* GetHandlerMethod() const {
return handler_method_;
}
@@ -92,6 +99,11 @@
handler_frame_depth_ = frame_depth;
}
+ // Walk the stack frames of the given thread, printing out non-runtime methods with their types
+ // of frames. Helps to verify that single-frame deopt really only deopted one frame.
+ static void DumpFramesWithType(Thread* self, bool details = false)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
private:
Thread* const self_;
Context* const context_;
@@ -103,6 +115,8 @@
ArtMethod** handler_quick_frame_;
// PC to branch to for the handler.
uintptr_t handler_quick_frame_pc_;
+ // The value for argument 0.
+ uintptr_t handler_quick_arg0_;
// The handler method to report to the debugger.
ArtMethod* handler_method_;
// The handler's dex PC, zero implies an uncaught exception.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5bf895e..82e6fb0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -250,10 +250,16 @@
tlsPtr_.stacked_shadow_frame_record = record;
}
-ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) {
+ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present) {
StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
- DCHECK(record != nullptr);
- DCHECK_EQ(record->GetType(), type);
+ if (must_be_present) {
+ DCHECK(record != nullptr);
+ DCHECK_EQ(record->GetType(), type);
+ } else {
+ if (record == nullptr || record->GetType() != type) {
+ return nullptr;
+ }
+ }
tlsPtr_.stacked_shadow_frame_record = record->GetLink();
ShadowFrame* shadow_frame = record->GetShadowFrame();
delete record;
diff --git a/runtime/thread.h b/runtime/thread.h
index 11f2e28..d21644d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -108,7 +108,8 @@
enum class StackedShadowFrameType {
kShadowFrameUnderConstruction,
- kDeoptimizationShadowFrame
+ kDeoptimizationShadowFrame,
+ kSingleFrameDeoptimizationShadowFrame
};
static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
@@ -843,7 +844,7 @@
void AssertHasDeoptimizationContext()
SHARED_REQUIRES(Locks::mutator_lock_);
void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type);
- ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type);
+ ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present = true);
// For debugger, find the shadow frame that corresponds to a frame id.
// Or return null if there is none.
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index a746664..f06c250 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -249,6 +249,25 @@
array[Integer.MAX_VALUE - 998] = 1;
}
+ /// CHECK-START: void Main.constantIndexing6(int[]) BCE (before)
+ /// CHECK: BoundsCheck
+ /// CHECK: ArraySet
+ /// CHECK: BoundsCheck
+ /// CHECK: ArraySet
+
+ /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after)
+ /// CHECK: Deoptimize
+
+ static void constantIndexing6(int[] array) {
+ array[3] = 1;
+ array[4] = 1;
+ }
+
+ // A helper into which the actual throwing function should be inlined.
+ static void constantIndexingForward6(int[] array) {
+ constantIndexing6(array);
+ }
+
/// CHECK-START: void Main.loopPattern1(int[]) BCE (before)
/// CHECK: BoundsCheck
/// CHECK: ArraySet
@@ -602,7 +621,12 @@
// This will cause AIOOBE.
constantIndexing2(new int[3]);
} catch (ArrayIndexOutOfBoundsException e) {
- return 99;
+ try {
+ // This will cause AIOOBE.
+ constantIndexingForward6(new int[3]);
+ } catch (ArrayIndexOutOfBoundsException e2) {
+ return 99;
+ }
}
return 0;
}