Speed up single-stepping

During single-stepping sequence, we need to deoptimize everything when we
register a single-step event and undeoptimize everything when it is done. This
causes a slow pattern where we continuously deoptimize-undeoptimize everything
for each single-step.

This CL introduces a special handling of single-step undeoptimization. We now
delay the undeoptimization to the next resume (one thread or all threads) or
the end of the debugging session. Indeed, a single-step event registration is
always followed by a resume command.
At the "resume" point, we know if a single-step event is registered and if we
really need to undeoptimize. At the "registration" point, we know we did not
undeoptimized everything so we don't need to deoptimize everything again.
Therefore, in a sequence of single-steps, we only do a full deoptimization for
the first single-step and a full undeoptimization for the last single-step.

We update logs at deoptimization points so we can track more precisely. Note
they are verbose logs that still must be enabled with -verbose:jdwp option.

We also make some improvement inside instrumentation:
* updates Instrumentation::ShouldNotifyMethodEnterExitEvents to comply with its
name.
* compute frame id only once when looking for the corresponding instrumentation
frame.
* compute the OatMethod once in ClassLinker::GetPortableOatCodeFor to avoid
looking for it again.

Bug: 13577964
Change-Id: If6fa198a676b515cd474b8c4d7bf7ef3626f2dc7
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 514ad4c..1f59617 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -211,6 +211,7 @@
 Mutex* Dbg::deoptimization_lock_ = nullptr;
 std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_;
 size_t Dbg::full_deoptimization_event_count_ = 0;
+size_t Dbg::delayed_full_undeoptimization_count_ = 0;
 
 // Breakpoints.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
@@ -625,6 +626,7 @@
     MutexLock mu(Thread::Current(), *deoptimization_lock_);
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
+    CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
   }
 
   Runtime* runtime = Runtime::Current();
@@ -667,6 +669,7 @@
       MutexLock mu(Thread::Current(), *deoptimization_lock_);
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
+      delayed_full_undeoptimization_count_ = 0U;
     }
     runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
                                                   instrumentation::Instrumentation::kMethodEntered |
@@ -2580,20 +2583,24 @@
       LOG(WARNING) << "Ignoring empty deoptimization request.";
       break;
     case DeoptimizationRequest::kFullDeoptimization:
-      VLOG(jdwp) << "Deoptimize the world";
+      VLOG(jdwp) << "Deoptimize the world ...";
       instrumentation->DeoptimizeEverything();
+      VLOG(jdwp) << "Deoptimize the world DONE";
       break;
     case DeoptimizationRequest::kFullUndeoptimization:
-      VLOG(jdwp) << "Undeoptimize the world";
+      VLOG(jdwp) << "Undeoptimize the world ...";
       instrumentation->UndeoptimizeEverything();
+      VLOG(jdwp) << "Undeoptimize the world DONE";
       break;
     case DeoptimizationRequest::kSelectiveDeoptimization:
-      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method);
+      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method) << " ...";
       instrumentation->Deoptimize(request.method);
+      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method) << " DONE";
       break;
     case DeoptimizationRequest::kSelectiveUndeoptimization:
-      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method);
+      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method) << " ...";
       instrumentation->Undeoptimize(request.method);
+      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method) << " DONE";
       break;
     default:
       LOG(FATAL) << "Unsupported deoptimization request kind " << request.kind;
@@ -2601,17 +2608,43 @@
   }
 }
 
+void Dbg::DelayFullUndeoptimization() {
+  MutexLock mu(Thread::Current(), *deoptimization_lock_);
+  ++delayed_full_undeoptimization_count_;
+  DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
+}
+
+void Dbg::ProcessDelayedFullUndeoptimizations() {
+  // TODO: avoid taking the lock twice (once here and once in ManageDeoptimization).
+  {
+    MutexLock mu(Thread::Current(), *deoptimization_lock_);
+    while (delayed_full_undeoptimization_count_ > 0) {
+      DeoptimizationRequest req;
+      req.kind = DeoptimizationRequest::kFullUndeoptimization;
+      req.method = nullptr;
+      RequestDeoptimizationLocked(req);
+      --delayed_full_undeoptimization_count_;
+    }
+  }
+  ManageDeoptimization();
+}
+
 void Dbg::RequestDeoptimization(const DeoptimizationRequest& req) {
   if (req.kind == DeoptimizationRequest::kNothing) {
     // Nothing to do.
     return;
   }
   MutexLock mu(Thread::Current(), *deoptimization_lock_);
+  RequestDeoptimizationLocked(req);
+}
+
+void Dbg::RequestDeoptimizationLocked(const DeoptimizationRequest& req) {
   switch (req.kind) {
     case DeoptimizationRequest::kFullDeoptimization: {
       DCHECK(req.method == nullptr);
       if (full_deoptimization_event_count_ == 0) {
-        VLOG(jdwp) << "Request full deoptimization";
+        VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                   << " for full deoptimization";
         deoptimization_requests_.push_back(req);
       }
       ++full_deoptimization_event_count_;
@@ -2622,20 +2655,23 @@
       DCHECK_GT(full_deoptimization_event_count_, 0U);
       --full_deoptimization_event_count_;
       if (full_deoptimization_event_count_ == 0) {
-        VLOG(jdwp) << "Request full undeoptimization";
+        VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                   << " for full undeoptimization";
         deoptimization_requests_.push_back(req);
       }
       break;
     }
     case DeoptimizationRequest::kSelectiveDeoptimization: {
       DCHECK(req.method != nullptr);
-      VLOG(jdwp) << "Request deoptimization of " << PrettyMethod(req.method);
+      VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                 << " for deoptimization of " << PrettyMethod(req.method);
       deoptimization_requests_.push_back(req);
       break;
     }
     case DeoptimizationRequest::kSelectiveUndeoptimization: {
       DCHECK(req.method != nullptr);
-      VLOG(jdwp) << "Request undeoptimization of " << PrettyMethod(req.method);
+      VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                 << " for undeoptimization of " << PrettyMethod(req.method);
       deoptimization_requests_.push_back(req);
       break;
     }
@@ -2663,7 +2699,9 @@
   const ThreadState old_state = self->SetStateUnsafe(kRunnable);
   {
     MutexLock mu(self, *deoptimization_lock_);
+    size_t req_index = 0;
     for (const DeoptimizationRequest& request : deoptimization_requests_) {
+      VLOG(jdwp) << "Process deoptimization request #" << req_index++;
       ProcessDeoptimizationRequest(request);
     }
     deoptimization_requests_.clear();