Speed up single-stepping
During single-stepping sequence, we need to deoptimize everything when we
register a single-step event and undeoptimize everything when it is done. This
causes a slow pattern where we continuously deoptimize-undeoptimize everything
for each single-step.
This CL introduces a special handling of single-step undeoptimization. We now
delay the undeoptimization to the next resume (one thread or all threads) or
the end of the debugging session. Indeed, a single-step event registration is
always followed by a resume command.
At the "resume" point, we know if a single-step event is registered and if we
really need to undeoptimize. At the "registration" point, we know we did not
undeoptimized everything so we don't need to deoptimize everything again.
Therefore, in a sequence of single-steps, we only do a full deoptimization for
the first single-step and a full undeoptimization for the last single-step.
We update logs at deoptimization points so we can track more precisely. Note
they are verbose logs that still must be enabled with -verbose:jdwp option.
We also make some improvement inside instrumentation:
* updates Instrumentation::ShouldNotifyMethodEnterExitEvents to comply with its
name.
* compute frame id only once when looking for the corresponding instrumentation
frame.
* compute the OatMethod once in ClassLinker::GetPortableOatCodeFor to avoid
looking for it again.
Bug: 13577964
Change-Id: If6fa198a676b515cd474b8c4d7bf7ef3626f2dc7
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 78b7cc0..8af5473 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1620,9 +1620,10 @@
if (method->IsProxyMethod()) {
return GetPortableProxyInvokeHandler();
}
- const void* result = GetOatMethodFor(method).GetPortableCode();
+ const OatFile::OatMethod oat_method = GetOatMethodFor(method);
+ const void* result = oat_method.GetPortableCode();
if (result == nullptr) {
- if (GetOatMethodFor(method).GetQuickCode() == nullptr) {
+ if (oat_method.GetQuickCode() == nullptr) {
// No code? You must mean to go into the interpreter.
result = GetPortableToInterpreterBridge();
} else {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 514ad4c..1f59617 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -211,6 +211,7 @@
Mutex* Dbg::deoptimization_lock_ = nullptr;
std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_;
size_t Dbg::full_deoptimization_event_count_ = 0;
+size_t Dbg::delayed_full_undeoptimization_count_ = 0;
// Breakpoints.
static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
@@ -625,6 +626,7 @@
MutexLock mu(Thread::Current(), *deoptimization_lock_);
CHECK_EQ(deoptimization_requests_.size(), 0U);
CHECK_EQ(full_deoptimization_event_count_, 0U);
+ CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
}
Runtime* runtime = Runtime::Current();
@@ -667,6 +669,7 @@
MutexLock mu(Thread::Current(), *deoptimization_lock_);
deoptimization_requests_.clear();
full_deoptimization_event_count_ = 0U;
+ delayed_full_undeoptimization_count_ = 0U;
}
runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
instrumentation::Instrumentation::kMethodEntered |
@@ -2580,20 +2583,24 @@
LOG(WARNING) << "Ignoring empty deoptimization request.";
break;
case DeoptimizationRequest::kFullDeoptimization:
- VLOG(jdwp) << "Deoptimize the world";
+ VLOG(jdwp) << "Deoptimize the world ...";
instrumentation->DeoptimizeEverything();
+ VLOG(jdwp) << "Deoptimize the world DONE";
break;
case DeoptimizationRequest::kFullUndeoptimization:
- VLOG(jdwp) << "Undeoptimize the world";
+ VLOG(jdwp) << "Undeoptimize the world ...";
instrumentation->UndeoptimizeEverything();
+ VLOG(jdwp) << "Undeoptimize the world DONE";
break;
case DeoptimizationRequest::kSelectiveDeoptimization:
- VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method);
+ VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method) << " ...";
instrumentation->Deoptimize(request.method);
+ VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method) << " DONE";
break;
case DeoptimizationRequest::kSelectiveUndeoptimization:
- VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method);
+ VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method) << " ...";
instrumentation->Undeoptimize(request.method);
+ VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method) << " DONE";
break;
default:
LOG(FATAL) << "Unsupported deoptimization request kind " << request.kind;
@@ -2601,17 +2608,43 @@
}
}
+void Dbg::DelayFullUndeoptimization() {
+ MutexLock mu(Thread::Current(), *deoptimization_lock_);
+ ++delayed_full_undeoptimization_count_;
+ DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
+}
+
+void Dbg::ProcessDelayedFullUndeoptimizations() {
+ // TODO: avoid taking the lock twice (once here and once in ManageDeoptimization).
+ {
+ MutexLock mu(Thread::Current(), *deoptimization_lock_);
+ while (delayed_full_undeoptimization_count_ > 0) {
+ DeoptimizationRequest req;
+ req.kind = DeoptimizationRequest::kFullUndeoptimization;
+ req.method = nullptr;
+ RequestDeoptimizationLocked(req);
+ --delayed_full_undeoptimization_count_;
+ }
+ }
+ ManageDeoptimization();
+}
+
void Dbg::RequestDeoptimization(const DeoptimizationRequest& req) {
if (req.kind == DeoptimizationRequest::kNothing) {
// Nothing to do.
return;
}
MutexLock mu(Thread::Current(), *deoptimization_lock_);
+ RequestDeoptimizationLocked(req);
+}
+
+void Dbg::RequestDeoptimizationLocked(const DeoptimizationRequest& req) {
switch (req.kind) {
case DeoptimizationRequest::kFullDeoptimization: {
DCHECK(req.method == nullptr);
if (full_deoptimization_event_count_ == 0) {
- VLOG(jdwp) << "Request full deoptimization";
+ VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+ << " for full deoptimization";
deoptimization_requests_.push_back(req);
}
++full_deoptimization_event_count_;
@@ -2622,20 +2655,23 @@
DCHECK_GT(full_deoptimization_event_count_, 0U);
--full_deoptimization_event_count_;
if (full_deoptimization_event_count_ == 0) {
- VLOG(jdwp) << "Request full undeoptimization";
+ VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+ << " for full undeoptimization";
deoptimization_requests_.push_back(req);
}
break;
}
case DeoptimizationRequest::kSelectiveDeoptimization: {
DCHECK(req.method != nullptr);
- VLOG(jdwp) << "Request deoptimization of " << PrettyMethod(req.method);
+ VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+ << " for deoptimization of " << PrettyMethod(req.method);
deoptimization_requests_.push_back(req);
break;
}
case DeoptimizationRequest::kSelectiveUndeoptimization: {
DCHECK(req.method != nullptr);
- VLOG(jdwp) << "Request undeoptimization of " << PrettyMethod(req.method);
+ VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+ << " for undeoptimization of " << PrettyMethod(req.method);
deoptimization_requests_.push_back(req);
break;
}
@@ -2663,7 +2699,9 @@
const ThreadState old_state = self->SetStateUnsafe(kRunnable);
{
MutexLock mu(self, *deoptimization_lock_);
+ size_t req_index = 0;
for (const DeoptimizationRequest& request : deoptimization_requests_) {
+ VLOG(jdwp) << "Process deoptimization request #" << req_index++;
ProcessDeoptimizationRequest(request);
}
deoptimization_requests_.clear();
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 23c9c6a..f9478c6 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -432,6 +432,13 @@
LOCKS_EXCLUDED(deoptimization_lock_)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ // Support delayed full undeoptimization requests. This is currently only used for single-step
+ // events.
+ static void DelayFullUndeoptimization() LOCKS_EXCLUDED(deoptimization_lock_);
+ static void ProcessDelayedFullUndeoptimizations()
+ LOCKS_EXCLUDED(deoptimization_lock_)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
// Manage deoptimization after updating JDWP events list. Suspends all threads, processes each
// request and finally resumes all threads.
static void ManageDeoptimization()
@@ -541,6 +548,10 @@
static void ProcessDeoptimizationRequest(const DeoptimizationRequest& request)
EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+ static void RequestDeoptimizationLocked(const DeoptimizationRequest& req)
+ EXCLUSIVE_LOCKS_REQUIRED(deoptimization_lock_)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
static Mutex* alloc_tracker_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(alloc_tracker_lock_);
@@ -562,6 +573,10 @@
// undeoptimize when the last event is unregistered (when the counter is set to 0).
static size_t full_deoptimization_event_count_ GUARDED_BY(deoptimization_lock_);
+ // Count the number of full undeoptimization requests delayed to next resume or end of debug
+ // session.
+ static size_t delayed_full_undeoptimization_count_ GUARDED_BY(deoptimization_lock_);
+
DISALLOW_COPY_AND_ASSIGN(Dbg);
};
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 525e2b3..5233462 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -255,7 +255,7 @@
visitor.WalkStack(true);
CHECK_EQ(visitor.dex_pcs_.size(), thread->GetInstrumentationStack()->size());
- if (!instrumentation->ShouldNotifyMethodEnterExitEvents()) {
+ if (instrumentation->ShouldNotifyMethodEnterExitEvents()) {
// Create method enter events for all methods currently on the thread's stack. We only do this
// if no debugger is attached to prevent from posting events twice.
typedef std::deque<InstrumentationStackFrame>::const_reverse_iterator It;
@@ -302,8 +302,9 @@
}
bool removed_stub = false;
// TODO: make this search more efficient?
- for (InstrumentationStackFrame instrumentation_frame : *instrumentation_stack_) {
- if (instrumentation_frame.frame_id_ == GetFrameId()) {
+ const size_t frameId = GetFrameId();
+ for (const InstrumentationStackFrame& instrumentation_frame : *instrumentation_stack_) {
+ if (instrumentation_frame.frame_id_ == frameId) {
if (kVerboseInstrumentation) {
LOG(INFO) << " Removing exit stub in " << DescribeLocation();
}
@@ -313,7 +314,7 @@
CHECK(m == instrumentation_frame.method_) << PrettyMethod(m);
}
SetReturnPc(instrumentation_frame.return_pc_);
- if (!instrumentation_->ShouldNotifyMethodEnterExitEvents()) {
+ if (instrumentation_->ShouldNotifyMethodEnterExitEvents()) {
// Create the method exit events. As the methods didn't really exit the result is 0.
// We only do this if no debugger is attached to prevent from posting events twice.
instrumentation_->MethodExitEvent(thread_, instrumentation_frame.this_object_, m,
@@ -439,7 +440,7 @@
// We're already set.
return;
}
- Thread* self = Thread::Current();
+ Thread* const self = Thread::Current();
Runtime* runtime = Runtime::Current();
Locks::thread_list_lock_->AssertNotHeld(self);
if (desired_level > 0) {
@@ -451,7 +452,7 @@
}
runtime->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, this);
instrumentation_stubs_installed_ = true;
- MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+ MutexLock mu(self, *Locks::thread_list_lock_);
runtime->GetThreadList()->ForEach(InstrumentationInstallStack, this);
} else {
interpreter_stubs_installed_ = false;
@@ -657,7 +658,7 @@
// Indicates if instrumentation should notify method enter/exit events to the listeners.
bool Instrumentation::ShouldNotifyMethodEnterExitEvents() const {
- return deoptimization_enabled_ || interpreter_stubs_installed_;
+ return !deoptimization_enabled_ && !interpreter_stubs_installed_;
}
void Instrumentation::DeoptimizeEverything() {
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 9b3ea2e..12370a4 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -248,7 +248,16 @@
Dbg::UnconfigureStep(pMod->step.threadId);
}
}
- if (NeedsFullDeoptimization(pEvent->eventKind)) {
+ if (pEvent->eventKind == EK_SINGLE_STEP) {
+ // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
+ // loop between each single-step. In a IDE, this would happens each time the user click on the
+ // "single-step" button. Here we delay the full undeoptimization to the next resume
+ // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
+ // runtime shutdown).
+ // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
+ // deoptimization and only the last single-step will trigger a full undeoptimization.
+ Dbg::DelayFullUndeoptimization();
+ } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
CHECK_EQ(req.kind, DeoptimizationRequest::kNothing);
CHECK(req.method == nullptr);
req.kind = DeoptimizationRequest::kFullUndeoptimization;
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c2a2b54..3e8b697 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -291,6 +291,7 @@
*/
static JdwpError VM_Resume(JdwpState*, Request&, ExpandBuf*)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ Dbg::ProcessDelayedFullUndeoptimizations();
Dbg::ResumeVM();
return ERR_NONE;
}
@@ -980,6 +981,8 @@
return ERR_NONE;
}
+ Dbg::ProcessDelayedFullUndeoptimizations();
+
Dbg::ResumeThread(thread_id);
return ERR_NONE;
}
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 5fc0228..aeb4016 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -332,6 +332,8 @@
CHECK(event_list_ == NULL);
}
+ Dbg::ProcessDelayedFullUndeoptimizations();
+
/*
* Should not have one of these in progress. If the debugger went away
* mid-request, though, we could see this.