Improve suspension timeout diagnostic and fix race

Fix a data race on state_and_flags. Since the access was volatile
and there are system calls in the loop, this is extremely unlikey
to have casused the bug here, but ...

So, assuming this is still broken, produce more informative
output once we time out.

Remove unused argument from SuspendThreadByPeer(). It made the
logic more complicated and made it harder to reason about
correctness.

Remove dead code after LOG(FATAL, ...)

Bug: 181778559
Test: TreeHugger, temporarily paste log message into hotter path.
Change-Id: I6f3455925b3a3f4726a870150aeb54ea60a38d67
diff --git a/openjdkjvm/OpenjdkJvm.cc b/openjdkjvm/OpenjdkJvm.cc
index 18078ab..90c944c 100644
--- a/openjdkjvm/OpenjdkJvm.cc
+++ b/openjdkjvm/OpenjdkJvm.cc
@@ -425,7 +425,6 @@
   art::Thread* thread;
   {
     thread = thread_list->SuspendThreadByPeer(jthread,
-                                              true,
                                               art::SuspendReason::kInternal,
                                               &timed_out);
   }
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index bb8fa3b..a9a6ee8 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -898,7 +898,6 @@
     bool timeout = true;
     art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
         target_jthread,
-        /* request_suspension= */ true,
         art::SuspendReason::kForUserCode,
         &timeout);
     if (ret_target == nullptr && !timeout) {
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 9d2dfac..e88516e 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -59,7 +59,6 @@
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     bool timed_out;
     Thread* thread = thread_list->SuspendThreadByPeer(peer,
-                                                      /* request_suspension= */ true,
                                                       SuspendReason::kInternal,
                                                       &timed_out);
     if (thread != nullptr) {
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 37b3fe6..c3b4fe0 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -148,7 +148,6 @@
   bool timed_out;
   // Take suspend thread lock to avoid races with threads trying to suspend this one.
   Thread* thread = thread_list->SuspendThreadByPeer(peer,
-                                                    /* request_suspension= */ true,
                                                     SuspendReason::kInternal,
                                                     &timed_out);
   if (thread != nullptr) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 46aa2b5..16a5f93 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -4468,6 +4468,12 @@
       WellKnownClasses::java_lang_Thread_systemDaemon)->GetBoolean(GetPeer());
 }
 
+std::string Thread::StateAndFlagsAsHexString() const {
+  std::stringstream result_stream;
+  result_stream << std::hex << tls32_.state_and_flags.as_atomic_int.load();
+  return result_stream.str();
+}
+
 ScopedExceptionStorage::ScopedExceptionStorage(art::Thread* self)
     : self_(self), hs_(self_), excp_(hs_.NewHandle<art::mirror::Throwable>(self_->GetException())) {
   self_->ClearException();
diff --git a/runtime/thread.h b/runtime/thread.h
index cd1227a..bf8b00a 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -254,7 +254,7 @@
 
   bool IsSuspended() const {
     union StateAndFlags state_and_flags;
-    state_and_flags.as_int = tls32_.state_and_flags.as_int;
+    state_and_flags.as_int = tls32_.state_and_flags.as_atomic_int.load(std::memory_order_relaxed);
     return state_and_flags.as_struct.state != kRunnable &&
         (state_and_flags.as_struct.flags & kSuspendRequest) != 0;
   }
@@ -1517,6 +1517,9 @@
   };
   static_assert(sizeof(StateAndFlags) == sizeof(int32_t), "Weird state_and_flags size");
 
+  // Format state and flags as a hex string. For diagnostic output.
+  std::string StateAndFlagsAsHexString() const;
+
   static void ThreadExitCallback(void* arg);
 
   // Maximum number of suspend barriers.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index e0d62d0..efc354c 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -874,10 +874,11 @@
 }
 
 Thread* ThreadList::SuspendThreadByPeer(jobject peer,
-                                        bool request_suspension,
                                         SuspendReason reason,
                                         bool* timed_out) {
+  bool request_suspension = true;
   const uint64_t start_time = NanoTime();
+  int self_suspend_count = 0;
   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
   *timed_out = false;
   Thread* const self = Thread::Current();
@@ -926,6 +927,7 @@
             // We hold the suspend count lock but another thread is trying to suspend us. Its not
             // safe to try to suspend another thread in case we get a cycle. Start the loop again
             // which will allow this thread to be suspended.
+            ++self_suspend_count;
             continue;
           }
           CHECK(suspended_thread == nullptr);
@@ -957,20 +959,22 @@
         }
         const uint64_t total_delay = NanoTime() - start_time;
         if (total_delay >= thread_suspend_timeout_ns_) {
-          ThreadSuspendByPeerWarning(self,
-                                     ::android::base::FATAL,
-                                     "Thread suspension timed out",
-                                     peer);
-          if (suspended_thread != nullptr) {
+          if (suspended_thread == nullptr) {
+            ThreadSuspendByPeerWarning(self,
+                                       ::android::base::FATAL,
+                                       "Failed to issue suspend request",
+                                       peer);
+          } else {
             CHECK_EQ(suspended_thread, thread);
-            bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
-                                                                -1,
-                                                                nullptr,
-                                                                reason);
-            DCHECK(updated);
+            LOG(WARNING) << "Suspended thread state_and_flags: "
+                         << suspended_thread->StateAndFlagsAsHexString()
+                         << ", self_suspend_count = " << self_suspend_count;
+            ThreadSuspendByPeerWarning(self,
+                                       ::android::base::FATAL,
+                                       "Thread suspension timed out",
+                                       peer);
           }
-          *timed_out = true;
-          return nullptr;
+          UNREACHABLE();
         } else if (sleep_us == 0 &&
             total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
           // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 87a4c8d..f5b58a0 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -81,11 +81,8 @@
 
   // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
   // else null. The peer is used to identify the thread to avoid races with the thread terminating.
-  // If the thread should be suspended then value of request_suspension should be true otherwise
-  // the routine will wait for a previous suspend request. If the suspension times out then *timeout
-  // is set to true.
+  // If the suspension times out then *timeout is set to true.
   Thread* SuspendThreadByPeer(jobject peer,
-                              bool request_suspension,
                               SuspendReason reason,
                               bool* timed_out)
       REQUIRES(!Locks::mutator_lock_,
diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
index a185446..a10fe2e 100644
--- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
+++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
@@ -81,7 +81,7 @@
   }
   bool timed_out = false;
   Thread* other = Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
-      target, true, SuspendReason::kInternal, &timed_out);
+      target, SuspendReason::kInternal, &timed_out);
   CHECK(!timed_out);
   CHECK(other != nullptr);
   ScopedSuspendAll ssa(__FUNCTION__);