ANR: Print threads in deterministic order.

Print interesting threads first just in case the ANR is trimmed.

The order is: main thread; blocked threads; locked threads; etc.

Bug: 189881220
Test: Manually trigger ANR and check output.
Change-Id: I49ed525fdaa77d818e78dc7862308f8389463ba8
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8e8e45f..547d638 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1397,15 +1397,19 @@
   tls32_.num_name_readers.fetch_sub(1 /* at least memory_order_release */);
 }
 
-void Thread::Dump(std::ostream& os, bool dump_native_stack, bool force_dump_stack) const {
+Thread::DumpOrder Thread::Dump(std::ostream& os,
+                               bool dump_native_stack,
+                               bool force_dump_stack) const {
   DumpState(os);
-  DumpStack(os, dump_native_stack, force_dump_stack);
+  return DumpStack(os, dump_native_stack, force_dump_stack);
 }
 
-void Thread::Dump(std::ostream& os, unwindstack::AndroidLocalUnwinder& unwinder,
-                  bool dump_native_stack, bool force_dump_stack) const {
+Thread::DumpOrder Thread::Dump(std::ostream& os,
+                               unwindstack::AndroidLocalUnwinder& unwinder,
+                               bool dump_native_stack,
+                               bool force_dump_stack) const {
   DumpState(os);
-  DumpStack(os, unwinder, dump_native_stack, force_dump_stack);
+  return DumpStack(os, unwinder, dump_native_stack, force_dump_stack);
 }
 
 ObjPtr<mirror::String> Thread::GetThreadName() const {
@@ -2207,11 +2211,13 @@
         UNREACHABLE();
     }
     PrintObject(obj, msg, owner_tid);
+    num_blocked++;
   }
   void VisitLockedObject(ObjPtr<mirror::Object> obj)
       override
       REQUIRES_SHARED(Locks::mutator_lock_) {
     PrintObject(obj, "  - locked ", ThreadList::kInvalidThreadId);
+    num_locked++;
   }
 
   void PrintObject(ObjPtr<mirror::Object> obj,
@@ -2245,6 +2251,8 @@
   ArtMethod* last_method;
   int last_line_number;
   size_t repetition_count;
+  size_t num_blocked = 0;
+  size_t num_locked = 0;
 };
 
 static bool ShouldShowNativeStack(const Thread* thread)
@@ -2276,7 +2284,9 @@
   return current_method != nullptr && current_method->IsNative();
 }
 
-void Thread::DumpJavaStack(std::ostream& os, bool check_suspended, bool dump_locks) const {
+Thread::DumpOrder Thread::DumpJavaStack(std::ostream& os,
+                                        bool check_suspended,
+                                        bool dump_locks) const {
   // Dumping the Java stack involves the verifier for locks. The verifier operates under the
   // assumption that there is no exception pending on entry. Thus, stash any pending exception.
   // Thread::Current() instead of this in case a thread is dumping the stack of another suspended
@@ -2287,19 +2297,28 @@
   StackDumpVisitor dumper(os, const_cast<Thread*>(this), context.get(),
                           !tls32_.throwing_OutOfMemoryError, check_suspended, dump_locks);
   dumper.WalkStack();
+  if (IsJitSensitiveThread()) {
+    return DumpOrder::kMain;
+  } else if (dumper.num_blocked > 0) {
+    return DumpOrder::kBlocked;
+  } else if (dumper.num_locked > 0) {
+    return DumpOrder::kLocked;
+  } else {
+    return DumpOrder::kDefault;
+  }
 }
 
-void Thread::DumpStack(std::ostream& os,
-                       bool dump_native_stack,
-                       bool force_dump_stack) const {
+Thread::DumpOrder Thread::DumpStack(std::ostream& os,
+                                    bool dump_native_stack,
+                                    bool force_dump_stack) const {
   unwindstack::AndroidLocalUnwinder unwinder;
-  DumpStack(os, unwinder, dump_native_stack, force_dump_stack);
+  return DumpStack(os, unwinder, dump_native_stack, force_dump_stack);
 }
 
-void Thread::DumpStack(std::ostream& os,
-                       unwindstack::AndroidLocalUnwinder& unwinder,
-                       bool dump_native_stack,
-                       bool force_dump_stack) const {
+Thread::DumpOrder Thread::DumpStack(std::ostream& os,
+                                    unwindstack::AndroidLocalUnwinder& unwinder,
+                                    bool dump_native_stack,
+                                    bool force_dump_stack) const {
   // TODO: we call this code when dying but may not have suspended the thread ourself. The
   //       IsSuspended check is therefore racy with the use for dumping (normally we inhibit
   //       the race with the thread_suspend_count_lock_).
@@ -2310,6 +2329,7 @@
     // thread's stack in debug builds where we'll hit the not suspended check in the stack walk.
     safe_to_dump = (safe_to_dump || dump_for_abort);
   }
+  DumpOrder dump_order = DumpOrder::kDefault;
   if (safe_to_dump || force_dump_stack) {
     // If we're currently in native code, dump that stack before dumping the managed stack.
     if (dump_native_stack && (dump_for_abort || force_dump_stack || ShouldShowNativeStack(this))) {
@@ -2319,12 +2339,13 @@
                            /*abort_on_error=*/ !(dump_for_abort || force_dump_stack));
       DumpNativeStack(os, unwinder, GetTid(), "  native: ", method);
     }
-    DumpJavaStack(os,
-                  /*check_suspended=*/ !force_dump_stack,
-                  /*dump_locks=*/ !force_dump_stack);
+    dump_order = DumpJavaStack(os,
+                               /*check_suspended=*/ !force_dump_stack,
+                               /*dump_locks=*/ !force_dump_stack);
   } else {
     os << "Not able to dump stack of thread that isn't suspended";
   }
+  return dump_order;
 }
 
 void Thread::ThreadExitCallback(void* arg) {
diff --git a/runtime/thread.h b/runtime/thread.h
index 18a00b8..dda4c08 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -270,20 +270,28 @@
   // Dumps a one-line summary of thread state (used for operator<<).
   void ShortDump(std::ostream& os) const;
 
+  // Order of threads for ANRs (ANRs can be trimmed, so we print important ones first).
+  enum class DumpOrder : uint8_t {
+    kMain,     // Always print the main thread first (there might not be one).
+    kBlocked,  // Then print all threads that are blocked due to waiting on lock.
+    kLocked,   // Then print all threads that are holding some lock already.
+    kDefault,  // Print all other threads which might not be interesting for ANR.
+  };
+
   // Dumps the detailed thread state and the thread stack (used for SIGQUIT).
-  void Dump(std::ostream& os,
-            bool dump_native_stack = true,
-            bool force_dump_stack = false) const
+  DumpOrder Dump(std::ostream& os,
+                 bool dump_native_stack = true,
+                 bool force_dump_stack = false) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void Dump(std::ostream& os,
-            unwindstack::AndroidLocalUnwinder& unwinder,
-            bool dump_native_stack = true,
-            bool force_dump_stack = false) const
+  DumpOrder Dump(std::ostream& os,
+                 unwindstack::AndroidLocalUnwinder& unwinder,
+                 bool dump_native_stack = true,
+                 bool force_dump_stack = false) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void DumpJavaStack(std::ostream& os,
-                     bool check_suspended = true,
-                     bool dump_locks = true) const
+  DumpOrder DumpJavaStack(std::ostream& os,
+                          bool check_suspended = true,
+                          bool dump_locks = true) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Dumps the SIGQUIT per-thread header. 'thread' can be null for a non-attached thread, in which
@@ -1524,14 +1532,14 @@
   void VerifyStackImpl() REQUIRES_SHARED(Locks::mutator_lock_);
 
   void DumpState(std::ostream& os) const REQUIRES_SHARED(Locks::mutator_lock_);
-  void DumpStack(std::ostream& os,
-                 bool dump_native_stack = true,
-                 bool force_dump_stack = false) const
+  DumpOrder DumpStack(std::ostream& os,
+                      bool dump_native_stack = true,
+                      bool force_dump_stack = false) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void DumpStack(std::ostream& os,
-                 unwindstack::AndroidLocalUnwinder& unwinder,
-                 bool dump_native_stack = true,
-                 bool force_dump_stack = false) const
+  DumpOrder DumpStack(std::ostream& os,
+                      unwindstack::AndroidLocalUnwinder& unwinder,
+                      bool dump_native_stack = true,
+                      bool force_dump_stack = false) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Out-of-line conveniences for debugging in gdb.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index f57ce76..f36e922 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -20,7 +20,9 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include <map>
 #include <sstream>
+#include <tuple>
 #include <vector>
 
 #include "android-base/stringprintf.h"
@@ -189,8 +191,9 @@
 // A closure used by Thread::Dump.
 class DumpCheckpoint final : public Closure {
  public:
-  DumpCheckpoint(std::ostream* os, bool dump_native_stack)
-      : os_(os),
+  DumpCheckpoint(bool dump_native_stack)
+      : lock_("Dump checkpoint lock", kGenericBottomLock),
+        os_(),
         // Avoid verifying count in case a thread doesn't end up passing through the barrier.
         // This avoids a SIGABRT that would otherwise happen in the destructor.
         barrier_(0, /*verify_count_on_shutdown=*/false),
@@ -204,18 +207,28 @@
     Thread* self = Thread::Current();
     CHECK(self != nullptr);
     std::ostringstream local_os;
+    Thread::DumpOrder dump_order;
     {
       ScopedObjectAccess soa(self);
-      thread->Dump(local_os, unwinder_, dump_native_stack_);
+      dump_order = thread->Dump(local_os, unwinder_, dump_native_stack_);
     }
     {
-      // Use the logging lock to ensure serialization when writing to the common ostream.
-      MutexLock mu(self, *Locks::logging_lock_);
-      *os_ << local_os.str() << std::endl;
+      MutexLock mu(self, lock_);
+      // Sort, so that the most interesting threads for ANR are printed first (ANRs can be trimmed).
+      std::pair<Thread::DumpOrder, uint32_t> sort_key(dump_order, thread->GetThreadId());
+      os_.emplace(sort_key, std::move(local_os));
     }
     barrier_.Pass(self);
   }
 
+  // Called at the end to print all the dumps in sequential prioritized order.
+  void Dump(Thread* self, std::ostream& os) {
+    MutexLock mu(self, lock_);
+    for (const auto& it : os_) {
+      os << it.second.str() << std::endl;
+    }
+  }
+
   void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
     Thread* self = Thread::Current();
     ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
@@ -228,8 +241,10 @@
   }
 
  private:
-  // The common stream that will accumulate all the dumps.
-  std::ostream* const os_;
+  // Storage for the per-thread dumps (guarded by lock since they are generated in parallel).
+  // Map is used to obtain sorted order. The key is unique, but use multimap just in case.
+  Mutex lock_;
+  std::multimap<std::pair<Thread::DumpOrder, uint32_t>, std::ostringstream> os_ GUARDED_BY(lock_);
   // The barrier to be passed through and for the requestor to wait upon.
   Barrier barrier_;
   // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
@@ -245,7 +260,7 @@
     os << "DALVIK THREADS (" << list_.size() << "):\n";
   }
   if (self != nullptr) {
-    DumpCheckpoint checkpoint(&os, dump_native_stack);
+    DumpCheckpoint checkpoint(dump_native_stack);
     size_t threads_running_checkpoint;
     {
       // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time.
@@ -255,6 +270,7 @@
     if (threads_running_checkpoint != 0) {
       checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
     }
+    checkpoint.Dump(self, os);
   } else {
     DumpUnattachedThreads(os, dump_native_stack);
   }