ART: Add stack-dump mode for contention logging

Add a second threshold to contention logging. If this threshold
is reached, log the owner and the contender thread to the logcat.

Requires a bit of refactor in the contention logging code to hold
the locks correctly (different requirements for owner and contender).
Add -Xstackdumplockprofthreshold:ms to control the threshold value.
As with general contention logging, default to 0 = off. Use ThreadStress
with --locks-only to get test coverage.

Bug: 62353392
Test: m test-art-host
Change-Id: I1b5a5130c4f908497ac06464130844f5ca42a5fb
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 4d309bf..940afc8 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -73,9 +73,12 @@
  */
 
 uint32_t Monitor::lock_profiling_threshold_ = 0;
+uint32_t Monitor::stack_dump_lock_profiling_threshold_ = 0;
 
-void Monitor::Init(uint32_t lock_profiling_threshold) {
+void Monitor::Init(uint32_t lock_profiling_threshold,
+                   uint32_t stack_dump_lock_profiling_threshold) {
   lock_profiling_threshold_ = lock_profiling_threshold;
+  stack_dump_lock_profiling_threshold_ = stack_dump_lock_profiling_threshold;
 }
 
 Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
@@ -411,19 +414,71 @@
           if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
             // Reacquire mutator_lock_ for logging.
             ScopedObjectAccess soa(self);
-            // Acquire thread-list lock to find thread and keep it from dying until we're done.
-            MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
 
-            // Re-find the owner in case the thread got killed.
-            Thread* original_owner = Runtime::Current()->GetThreadList()->FindThreadByThreadId(
-                original_owner_thread_id);
+            bool owner_alive = false;
+            pid_t original_owner_tid = 0;
+            std::string original_owner_name;
 
-            if (original_owner != nullptr) {
-              pid_t original_owner_tid = original_owner->GetTid();
-              std::string original_owner_name;
-              original_owner->GetThreadName(original_owner_name);
+            const bool should_dump_stacks = stack_dump_lock_profiling_threshold_ > 0 &&
+                wait_ms > stack_dump_lock_profiling_threshold_;
+            std::string owner_stack_dump;
 
-              if (wait_ms > kLongWaitMs && owners_method != nullptr) {
+            // Acquire thread-list lock to find thread and keep it from dying until we've got all
+            // the info we need.
+            {
+              MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
+
+              // Re-find the owner in case the thread got killed.
+              Thread* original_owner = Runtime::Current()->GetThreadList()->FindThreadByThreadId(
+                  original_owner_thread_id);
+
+              if (original_owner != nullptr) {
+                owner_alive = true;
+                original_owner_tid = original_owner->GetTid();
+                original_owner->GetThreadName(original_owner_name);
+
+                if (should_dump_stacks) {
+                  // Very long contention. Dump stacks.
+                  struct CollectStackTrace : public Closure {
+                    void Run(art::Thread* thread) OVERRIDE
+                        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+                      thread->DumpJavaStack(oss);
+                    }
+
+                    std::ostringstream oss;
+                  };
+                  CollectStackTrace owner_trace;
+                  original_owner->RequestSynchronousCheckpoint(&owner_trace);
+                  owner_stack_dump = owner_trace.oss.str();
+                }
+              }
+              // This is all the data we need. Now drop the thread-list lock, it's OK for the
+              // owner to go away now.
+            }
+
+            // If we found the owner (and thus have owner data), go and log now.
+            if (owner_alive) {
+              // Give the detailed traces for really long contention.
+              if (should_dump_stacks) {
+                // This must be here (and not above) because we cannot hold the thread-list lock
+                // while running the checkpoint.
+                std::ostringstream self_trace_oss;
+                self->DumpJavaStack(self_trace_oss);
+
+                uint32_t pc;
+                ArtMethod* m = self->GetCurrentMethod(&pc);
+
+                LOG(WARNING) << "Long "
+                    << PrettyContentionInfo(original_owner_name,
+                                            original_owner_tid,
+                                            owners_method,
+                                            owners_dex_pc,
+                                            num_waiters)
+                    << " in " << ArtMethod::PrettyMethod(m) << " for "
+                    << PrettyDuration(MsToNs(wait_ms)) << "\n"
+                    << "Current owner stack:\n" << owner_stack_dump
+                    << "Contender stack:\n" << self_trace_oss.str();
+              } else if (wait_ms > kLongWaitMs && owners_method != nullptr) {
                 uint32_t pc;
                 ArtMethod* m = self->GetCurrentMethod(&pc);
                 // TODO: We should maybe check that original_owner is still a live thread.
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 752b2a0..09faeba 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -55,7 +55,7 @@
 
   ~Monitor();
 
-  static void Init(uint32_t lock_profiling_threshold);
+  static void Init(uint32_t lock_profiling_threshold, uint32_t stack_dump_lock_profiling_threshold);
 
   // Return the thread id of the lock owner or 0 when there is no owner.
   static uint32_t GetLockOwnerThreadId(mirror::Object* obj)
@@ -273,6 +273,7 @@
   ALWAYS_INLINE static void AtraceMonitorUnlock();
 
   static uint32_t lock_profiling_threshold_;
+  static uint32_t stack_dump_lock_profiling_threshold_;
 
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index b1eb506..1d524fd 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -239,6 +239,9 @@
       .Define("-Xlockprofthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::LockProfThreshold)
+      .Define("-Xstackdumplockprofthreshold:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::StackDumpLockProfThreshold)
       .Define("-Xusetombstonedtraces")
           .WithValue(true)
           .IntoKey(M::UseTombstonedTraces)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 46b4392..bf9e405 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1064,7 +1064,8 @@
   oat_file_manager_ = new OatFileManager;
 
   Thread::SetSensitiveThreadHook(runtime_options.GetOrDefault(Opt::HookIsSensitiveThread));
-  Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold));
+  Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold),
+                runtime_options.GetOrDefault(Opt::StackDumpLockProfThreshold));
 
   boot_class_path_string_ = runtime_options.ReleaseOrDefault(Opt::BootClassPath);
   class_path_string_ = runtime_options.ReleaseOrDefault(Opt::ClassPath);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 3d23e20..09a200a 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -100,6 +100,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        StackDumpLockProfThreshold)
 RUNTIME_OPTIONS_KEY (bool,                UseTombstonedTraces, false)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
diff --git a/test/004-ThreadStress/expected.txt b/test/004-ThreadStress/expected.txt
index 772faf6..9eaab24 100644
--- a/test/004-ThreadStress/expected.txt
+++ b/test/004-ThreadStress/expected.txt
@@ -9,3 +9,14 @@
 Finishing worker
 Finishing worker
 Finishing worker
+JNI_OnLoad called
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Finishing worker
+Finishing worker
+Finishing worker
+Finishing worker
+Finishing worker
diff --git a/test/004-ThreadStress/run b/test/004-ThreadStress/run
index d2e7fae..3851d73 100755
--- a/test/004-ThreadStress/run
+++ b/test/004-ThreadStress/run
@@ -16,3 +16,7 @@
 
 # Enable lock contention logging.
 ${RUN} --runtime-option -Xlockprofthreshold:10 "${@}"
+
+# Run locks-only mode with stack-dump lock profiling. Reduce the number of total operations from
+# the default 1000 to 100.
+${RUN} --runtime-option -Xlockprofthreshold:10 --runtime-option -Xstackdumplockprofthreshold:20 "${@}" Main --locks-only -o 100
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index 5cae398..95d1649 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -232,7 +232,7 @@
         public boolean perform() {
             synchronized (lock) {
                 try {
-                    Thread.sleep((int)(Math.random()*10));
+                    Thread.sleep((int)(Math.random() * 50 + 50));
                 } catch (InterruptedException ignored) {
                 }
             }