Revert "Revert^2 "Thread suspension cleanup and deadlock fix""

This reverts commit fd20a745227aa7cae7a08728bb29e5bfce64ea87. Reason for revert: Lots of libartd failures due to new checkpoint lock level check. Change-Id: I0cf88ff893f8743a9a830a49489807d0921199a3
author: Hans Boehm <hboehm@google.com> 2022-10-13 02:47:24 +0000
committer: Hans Boehm <hboehm@google.com> 2022-10-13 02:47:24 +0000
commit: ebd76406bf5fa74185998bc29f0f27c20fa2e683 (patch)
tree: beec1accc089bbbc432a140dfebe28bb7278a909
parent: fd20a745227aa7cae7a08728bb29e5bfce64ea87 (diff)
23 files changed, 765 insertions, 810 deletions
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index dac21ae4dc..ae84338711 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -155,7 +155,7 @@ const char* const VixlJniHelpersResults = {
   "     224: d9 f8 24 80   ldr.w r8, [r9, #36]\n"
   "     228: 70 47         bx lr\n"
   "     22a: d9 f8 9c 00   ldr.w r0, [r9, #156]\n"
-  "     22e: d9 f8 d0 e2   ldr.w lr, [r9, #720]\n"
+  "     22e: d9 f8 d4 e2   ldr.w lr, [r9, #724]\n"
   "     232: f0 47         blx lr\n"
 };
 
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 7d39fdf452..7fb789d558 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -900,14 +900,16 @@ jvmtiError ThreadUtil::SuspendOther(art::Thread* self,
         }
       }
     }
+    bool timeout = true;
     art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
         target_jthread,
-        art::SuspendReason::kForUserCode);
-    if (ret_target == nullptr) {
+        art::SuspendReason::kForUserCode,
+        &timeout);
+    if (ret_target == nullptr && !timeout) {
       // TODO It would be good to get more information about why exactly the thread failed to
       // suspend.
       return ERR(INTERNAL);
-    } else {
+    } else if (!timeout) {
       // we didn't time out and got a result.
       return OK;
     }
@@ -925,7 +927,11 @@ jvmtiError ThreadUtil::SuspendSelf(art::Thread* self) {
       // This can only happen if we race with another thread to suspend 'self' and we lose.
       return ERR(THREAD_SUSPENDED);
     }
-    self->IncrementSuspendCount(self, nullptr, nullptr, art::SuspendReason::kForUserCode);
+    // We shouldn't be able to fail this.
+    if (!self->ModifySuspendCount(self, +1, nullptr, art::SuspendReason::kForUserCode)) {
+      // TODO More specific error would be nice.
+      return ERR(INTERNAL);
+    }
   }
   // Once we have requested the suspend we actually go to sleep. We need to do this after releasing
   // the suspend_lock to make sure we can be woken up. This call gains the mutator lock causing us
diff --git a/runtime/base/locks.h b/runtime/base/locks.h
index e8c83fe2b2..c15e5dee71 100644
--- a/runtime/base/locks.h
+++ b/runtime/base/locks.h
@@ -108,6 +108,10 @@ enum LockLevel : uint8_t {
   kClassLinkerClassesLock,  // TODO rename.
   kSubtypeCheckLock,
   kBreakpointLock,
+  // This is a generic lock level for a lock meant to be gained after having a
+  // monitor lock.
+  kPostMonitorLock,
+  kMonitorLock,
   kMonitorListLock,
   kThreadListLock,
   kAllocTrackerLock,
@@ -121,10 +125,7 @@ enum LockLevel : uint8_t {
   kRuntimeShutdownLock,
   kTraceLock,
   kHeapBitmapLock,
-  // This is a generic lock level for a lock meant to be gained after having a
-  // monitor lock.
-  kPostMonitorLock,
-  kMonitorLock,
+
   // This is a generic lock level for a top-level lock meant to be gained after having the
   // mutator_lock_.
   kPostMutatorTopLockLevel,
@@ -137,7 +138,7 @@ enum LockLevel : uint8_t {
   kUserCodeSuspensionLock,
   kZygoteCreationLock,
 
-  // The highest valid lock level. Use this for locks that should only be acquired with no
+  // The highest valid lock level. Use this if there is code that should only be called with no
   // other locks held. Since this is the highest lock level we also allow it to be held even if the
   // runtime or current thread is not fully set-up yet (for example during thread attach). Note that
   // this lock also has special behavior around the mutator_lock_. Since the mutator_lock_ is not
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index a8dc95a86b..dba1e1299b 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -60,43 +60,43 @@ static inline void CheckUnattachedThread(LockLevel level) NO_THREAD_SAFETY_ANALY
   // The check below enumerates the cases where we expect not to be able to check the validity of
   // locks on a thread. Lock checking is disabled to avoid deadlock when checking shutdown lock.
   // TODO: tighten this check.
-  CHECK(!Locks::IsSafeToCallAbortRacy() ||
-        // Used during thread creation to avoid races with runtime shutdown. Thread::Current not
-        // yet established.
-        level == kRuntimeShutdownLock ||
-        // Thread Ids are allocated/released before threads are established.
-        level == kAllocatedThreadIdsLock ||
-        // Thread LDT's are initialized without Thread::Current established.
-        level == kModifyLdtLock ||
-        // Threads are unregistered while holding the thread list lock, during this process they
-        // no longer exist and so we expect an unlock with no self.
-        level == kThreadListLock ||
-        // Ignore logging which may or may not have set up thread data structures.
-        level == kLoggingLock ||
-        // When transitioning from suspended to runnable, a daemon thread might be in
-        // a situation where the runtime is shutting down. To not crash our debug locking
-        // mechanism we just pass null Thread* to the MutexLock during that transition
-        // (see Thread::TransitionFromSuspendedToRunnable).
-        level == kThreadSuspendCountLock ||
-        // Avoid recursive death.
-        level == kAbortLock ||
-        // Locks at the absolute top of the stack can be locked at any time.
-        level == kTopLockLevel ||
-        // The unexpected signal handler may be catching signals from any thread.
-        level == kUnexpectedSignalLock) << level;
+  if (kDebugLocking) {
+    CHECK(!Locks::IsSafeToCallAbortRacy() ||
+          // Used during thread creation to avoid races with runtime shutdown. Thread::Current not
+          // yet established.
+          level == kRuntimeShutdownLock ||
+          // Thread Ids are allocated/released before threads are established.
+          level == kAllocatedThreadIdsLock ||
+          // Thread LDT's are initialized without Thread::Current established.
+          level == kModifyLdtLock ||
+          // Threads are unregistered while holding the thread list lock, during this process they
+          // no longer exist and so we expect an unlock with no self.
+          level == kThreadListLock ||
+          // Ignore logging which may or may not have set up thread data structures.
+          level == kLoggingLock ||
+          // When transitioning from suspended to runnable, a daemon thread might be in
+          // a situation where the runtime is shutting down. To not crash our debug locking
+          // mechanism we just pass null Thread* to the MutexLock during that transition
+          // (see Thread::TransitionFromSuspendedToRunnable).
+          level == kThreadSuspendCountLock ||
+          // Avoid recursive death.
+          level == kAbortLock ||
+          // Locks at the absolute top of the stack can be locked at any time.
+          level == kTopLockLevel ||
+          // The unexpected signal handler may be catching signals from any thread.
+          level == kUnexpectedSignalLock) << level;
+  }
 }
 
-inline void BaseMutex::RegisterAsLocked(Thread* self, bool check) {
+inline void BaseMutex::RegisterAsLocked(Thread* self) {
   if (UNLIKELY(self == nullptr)) {
-    if (check) {
-      CheckUnattachedThread(level_);
-    }
-  } else {
-    RegisterAsLockedImpl(self, level_, check);
+    CheckUnattachedThread(level_);
+    return;
   }
+  RegisterAsLockedImpl(self, level_);
 }
 
-inline void BaseMutex::RegisterAsLockedImpl(Thread* self, LockLevel level, bool check) {
+inline void BaseMutex::RegisterAsLockedImpl(Thread* self, LockLevel level) {
   DCHECK(self != nullptr);
   DCHECK_EQ(level_, level);
   // It would be nice to avoid this condition checking in the non-debug case,
@@ -107,7 +107,7 @@ inline void BaseMutex::RegisterAsLockedImpl(Thread* self, LockLevel level, bool
   if (UNLIKELY(level == kThreadWaitLock) && self->GetHeldMutex(kThreadWaitLock) != nullptr) {
     level = kThreadWaitWakeLock;
   }
-  if (check) {
+  if (kDebugLocking) {
     // Check if a bad Mutex of this level or lower is held.
     bool bad_mutexes_held = false;
     // Specifically allow a kTopLockLevel lock to be gained when the current thread holds the
@@ -161,12 +161,10 @@ inline void BaseMutex::RegisterAsLockedImpl(Thread* self, LockLevel level, bool
 
 inline void BaseMutex::RegisterAsUnlocked(Thread* self) {
   if (UNLIKELY(self == nullptr)) {
-    if (kDebugLocking) {
-      CheckUnattachedThread(level_);
-    }
-  } else {
-    RegisterAsUnlockedImpl(self , level_);
+    CheckUnattachedThread(level_);
+    return;
   }
+  RegisterAsUnlockedImpl(self , level_);
 }
 
 inline void BaseMutex::RegisterAsUnlockedImpl(Thread* self, LockLevel level) {
@@ -308,7 +306,7 @@ inline void MutatorMutex::TransitionFromRunnableToSuspended(Thread* self) {
 }
 
 inline void MutatorMutex::TransitionFromSuspendedToRunnable(Thread* self) {
-  RegisterAsLockedImpl(self, kMutatorLock, kDebugLocking);
+  RegisterAsLockedImpl(self, kMutatorLock);
   AssertSharedHeld(self);
 }
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 2d6f178487..01d7e73774 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -245,12 +245,11 @@ void BaseMutex::DumpAll(std::ostream& os) {
 }
 
 void BaseMutex::CheckSafeToWait(Thread* self) {
-  if (!kDebugLocking) {
-    return;
-  }
   if (self == nullptr) {
     CheckUnattachedThread(level_);
-  } else {
+    return;
+  }
+  if (kDebugLocking) {
     CHECK(self->GetHeldMutex(level_) == this || level_ == kMonitorLock)
         << "Waiting on unacquired mutex: " << name_;
     bool bad_mutexes_held = false;
@@ -566,7 +565,6 @@ bool Mutex::IsDumpFrequent(Thread* thread, uint64_t try_times) {
   }
 }
 
-template<bool check>
 bool Mutex::ExclusiveTryLock(Thread* self) {
   DCHECK(self == nullptr || self == Thread::Current());
   if (kDebugLocking && !recursive_) {
@@ -597,7 +595,7 @@ bool Mutex::ExclusiveTryLock(Thread* self) {
 #endif
     DCHECK_EQ(GetExclusiveOwnerTid(), 0);
     exclusive_owner_.store(SafeGetTid(self), std::memory_order_relaxed);
-    RegisterAsLocked(self, check);
+    RegisterAsLocked(self);
   }
   recursion_count_++;
   if (kDebugLocking) {
@@ -608,9 +606,6 @@ bool Mutex::ExclusiveTryLock(Thread* self) {
   return true;
 }
 
-template bool Mutex::ExclusiveTryLock<false>(Thread* self);
-template bool Mutex::ExclusiveTryLock<true>(Thread* self);
-
 bool Mutex::ExclusiveTryLockWithSpinning(Thread* self) {
   // Spin a small number of times, since this affects our ability to respond to suspension
   // requests. We spin repeatedly only if the mutex repeatedly becomes available and unavailable
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 407effeab6..87e9525557 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -103,11 +103,10 @@ class BaseMutex {
   BaseMutex(const char* name, LockLevel level);
   virtual ~BaseMutex();
 
-  // Add this mutex to those owned by self, and optionally perform lock order checking.  Caller
-  // may wish to disable checking for trylock calls that cannot result in deadlock.  For this call
-  // only, self may also be another suspended thread.
-  void RegisterAsLocked(Thread* self, bool check = kDebugLocking);
-  void RegisterAsLockedImpl(Thread* self, LockLevel level, bool check);
+  // Add this mutex to those owned by self, and perform appropriate checking.
+  // For this call only, self may also be another suspended thread.
+  void RegisterAsLocked(Thread* self);
+  void RegisterAsLockedImpl(Thread* self, LockLevel level);
 
   void RegisterAsUnlocked(Thread* self);
   void RegisterAsUnlockedImpl(Thread* self, LockLevel level);
@@ -184,10 +183,7 @@ class LOCKABLE Mutex : public BaseMutex {
   void ExclusiveLock(Thread* self) ACQUIRE();
   void Lock(Thread* self) ACQUIRE() {  ExclusiveLock(self); }
 
-  // Returns true if acquires exclusive access, false otherwise. The `check` argument specifies
-  // whether lock level checking should be performed.  Should be defaulted unless we are using
-  // TryLock instead of Lock for deadlock avoidance.
-  template<bool check = kDebugLocking>
+  // Returns true if acquires exclusive access, false otherwise.
   bool ExclusiveTryLock(Thread* self) TRY_ACQUIRE(true);
   bool TryLock(Thread* self) TRY_ACQUIRE(true) { return ExclusiveTryLock(self); }
   // Equivalent to ExclusiveTryLock, but retry for a short period before giving up.
diff --git a/runtime/cha.cc b/runtime/cha.cc
index 0c032aa125..d19d4f6e30 100644
--- a/runtime/cha.cc
+++ b/runtime/cha.cc
@@ -241,7 +241,7 @@ class CHACheckpoint final : public Closure {
     // Note thread and self may not be equal if thread was already suspended at
     // the point of the request.
     Thread* self = Thread::Current();
-    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+    ScopedObjectAccess soa(self);
     CHAStackVisitor visitor(thread, nullptr, method_headers_);
     visitor.WalkStack();
     barrier_.Pass(self);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 969a229660..af365310ef 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -349,10 +349,7 @@ void Dbg::DdmSetThreadNotification(bool enable) {
       Dbg::DdmSendThreadNotification(thread, CHUNK_TYPE("THCR"));
       finish_barrier.Pass(cls_self);
     });
-    // TODO: The above eventually results in calls to EventHandler::DispatchEvent, which does a
-    // ScopedThreadStateChange, which amounts to a thread state change inside the checkpoint run
-    // method. Hence the normal check would fail, and thus we specify Unchecked here.
-    size_t checkpoints = Runtime::Current()->GetThreadList()->RunCheckpointUnchecked(&fc);
+    size_t checkpoints = Runtime::Current()->GetThreadList()->RunCheckpoint(&fc);
     ScopedThreadSuspension sts(self, ThreadState::kWaitingForCheckPointsToRun);
     finish_barrier.Increment(self, checkpoints);
   }
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index bccfa52f92..e969652c7a 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -111,15 +111,13 @@ class EntrypointsOrderTest : public CommonRuntimeTest {
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_function,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspendall_barrier,
-                        sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspendall_barrier, active_suspend1_barriers,
-                        sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend1_barriers, thread_local_pos,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspend_barriers,
                         sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, thread_local_start,
+                        sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_limit, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_limit, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_limit, thread_local_objects, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, jni_entrypoints, sizeof(size_t));
 
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 2aa0f10023..bb9e85dd0e 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -181,8 +181,7 @@ void Object::SetHashCodeSeed(uint32_t new_seed) {
   hash_code_seed.store(new_seed, std::memory_order_relaxed);
 }
 
-template<bool kAllowInflation>
-int32_t Object::IdentityHashCodeHelper() {
+int32_t Object::IdentityHashCode() {
   ObjPtr<Object> current_this = this;  // The this pointer may get invalidated by thread suspension.
   while (true) {
     LockWord lw = current_this->GetLockWord(false);
@@ -200,9 +199,6 @@ int32_t Object::IdentityHashCodeHelper() {
         break;
       }
       case LockWord::kThinLocked: {
-        if (!kAllowInflation) {
-          return 0;
-        }
         // Inflate the thin lock to a monitor and stick the hash code inside of the monitor. May
         // fail spuriously.
         Thread* self = Thread::Current();
@@ -230,14 +226,6 @@ int32_t Object::IdentityHashCodeHelper() {
   }
 }
 
-int32_t Object::IdentityHashCode() {
-  return IdentityHashCodeHelper</* kAllowInflation= */ true>();
-}
-
-int32_t Object::IdentityHashCodeNoInflation() {
-  return IdentityHashCodeHelper</* kAllowInflation= */ false>();
-}
-
 void Object::CheckFieldAssignmentImpl(MemberOffset field_offset, ObjPtr<Object> new_value) {
   ObjPtr<Class> c = GetClass();
   Runtime* runtime = Runtime::Current();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index a491a6a3e3..0ba545becc 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -137,18 +137,11 @@ class MANAGED LOCKABLE Object {
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  // Returns a nonzero value that fits into lockword slot.
   int32_t IdentityHashCode()
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
-  // Identical to the above, but returns 0 if monitor inflation would otherwise be needed.
-  int32_t IdentityHashCodeNoInflation()
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_,
-               !Locks::thread_suspend_count_lock_);
-
   static constexpr MemberOffset MonitorOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
@@ -726,12 +719,6 @@ class MANAGED LOCKABLE Object {
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  template<bool kAllowInflation>
-  int32_t IdentityHashCodeHelper()
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_,
-               !Locks::thread_suspend_count_lock_);
-
   // Get a field with acquire semantics.
   template<typename kSize>
   ALWAYS_INLINE kSize GetFieldAcquire(MemberOffset field_offset)
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 6609b75fbb..4e64c95b8c 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1005,7 +1005,7 @@ bool Monitor::Deflate(Thread* self, ObjPtr<mirror::Object> obj) {
     if (monitor->num_waiters_.load(std::memory_order_relaxed) > 0) {
       return false;
     }
-    if (!monitor->monitor_lock_.ExclusiveTryLock</* check= */ false>(self)) {
+    if (!monitor->monitor_lock_.ExclusiveTryLock(self)) {
       // We cannot deflate a monitor that's currently held. It's unclear whether we should if
       // we could.
       return false;
@@ -1065,10 +1065,13 @@ void Monitor::InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWo
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
     self->SetMonitorEnterObject(obj.Get());
+    bool timed_out;
     Thread* owner;
     {
       ScopedThreadSuspension sts(self, ThreadState::kWaitingForLockInflation);
-      owner = thread_list->SuspendThreadByThreadId(owner_thread_id, SuspendReason::kInternal);
+      owner = thread_list->SuspendThreadByThreadId(owner_thread_id,
+                                                   SuspendReason::kInternal,
+                                                   &timed_out);
     }
     if (owner != nullptr) {
       // We succeeded in suspending the thread, check the lock's status didn't change.
diff --git a/runtime/mutator_gc_coord.md b/runtime/mutator_gc_coord.md
index e724de4a76..b3635c558a 100644
--- a/runtime/mutator_gc_coord.md
+++ b/runtime/mutator_gc_coord.md
@@ -217,77 +217,6 @@ processing, after acquiring `reference_processor_lock_`.  This means that empty
 checkpoints do not preclude client threads from being in the middle of an
 operation that involves a weak reference access, while nonempty checkpoints do.
 
-Thread Suspension Mechanics
----------------------------
-
-Thread suspension is initiated by a registered thread, except that, for testing
-purposes, `SuspendAll` may be invoked with `self == nullptr`.  We never suspend
-the initiating thread, explicitly exclusing it from `SuspendAll()`, and failing
-`SuspendThreadBy...()` requests to that effect.
-
-The suspend calls invoke `IncrementSuspendCount()` to increment the thread
-suspend count for each thread. That adds a "suspend barrier" (atomic counter) to
-the per-thread list of such counters to decrement. It normally sets the
-`kSuspendRequest` ("should enter safepoint handler") and `kActiveSuspendBarrier`
-("need to notify us when suspended") flags.
-
-After setting these two flags, we check whether the thread is suspended and
-`kSuspendRequest` is still set. Since the thread is already suspended, it cannot
-be expected to respond to "pass the suspend barrier" (decrement the atomic
-counter) in a timely fashion.  Hence we do so on its behalf. This decrements
-the "barrier" and removes it from the thread's list of barriers to decrement,
-and clears `kActiveSuspendBarrier`. `kSuspendRequest` remains to ensure the
-thread doesn't prematurely return to runnable state.
-
-If `SuspendAllInternal()` does not immediately see a suspended state, then it is up
-to the target thread to decrement the suspend barrier.
-`TransitionFromRunnableToSuspended()` calls
-`TransitionToSuspendedAndRunCheckpoints()`, which changes the thread state, and
-then calls `CheckActiveSuspendBarriers()` to check for the
-`kActiveSuspendBarrier` flag and decrement the suspend barrier if set.
-
-The `suspend_count_lock_` is not consistently held in the target thread
-during this process.  Thus correctness in resolving the race between a
-suspension-requesting thread and a target thread voluntarily suspending relies
-on the following: If the requesting thread sets the flags with
-`kActiveSuspendBarrier` before the target's state is changed to suspended, then
-the target thread will see `kActiveSuspendBarrier` set, and will attempt to
-handle the barrier. If, on the other hand, the target thread changes the thread
-state first, then the requesting thread will see the suspended state, and handle
-the barrier itself. Since the actual update of suspend counts and suspend
-barrier data structures is done under the `suspend_count_lock_`, we always
-ensure that either the requestor removes/clears the barrier for a given target,
-or the target thread(s) decrement the barrier, but not both. This also ensures
-that the barrier cannot be decremented after the stack frame holding the barrier
-goes away.
-
-This relies on the fact that the two stores in the two threads to the state and
-kActiveSuspendBarrier flag are ordered with respect to the later loads. That's
-guaranteed, since they are all stored in a single `atomic<>`. Thus even relaxed
-accesses are OK.
-
-The actual suspend barrier representation still varies between `SuspendAll()`
-and `SuspendThreadBy...()`.  The former relies on the fact that only one such
-barrier can be in use at a time, while the latter maintains a linked list of
-active suspend barriers for each target thread, relying on the fact that each
-one can appear on the list of only one thread, and we can thus use list nodes
-allocated in the stack frames of requesting threads.
-
-**Avoiding suspension cycles**
-
-Any thread can issue a `SuspendThreadByPeer()` or `SuspendAll()` request. But if
-Thread A increments Thread B's suspend count while Thread B increments Thread
-A's suspend count, and they then both suspend during a subsequent thread
-transition, we're deadlocked.
-
-For `SuspendAll()`, we enforce a requirement that at most one `SuspendAll()`
-request is running at one time. In addition, in all cases, we refuse to initiate
-a suspend request from a registered thread that is also being asked to suspend
-(i.e. the suspend count is nonzero).  Instead the requestor waits for that
-condition to change.  This means that we cannot create a cycle in which each
-thread has asked to suspend the next one, and thus no thread can progress.  The
-required atomicity of the requestor suspend count check with setting the suspend
-count of the target(s) target is ensured by holding `suspend_count_lock_`.
 
 [^1]: Some comments in the code refer to a not-yet-really-implemented scheme in
 which the compiler-generated code would load through the address at
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 7b21de204d..71078c9ad2 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -57,7 +57,10 @@ static ResultT GetThreadStack(const ScopedFastNativeObjectAccess& soa,
     // Suspend thread to build stack trace.
     ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
-    Thread* thread = thread_list->SuspendThreadByPeer(peer, SuspendReason::kInternal);
+    bool timed_out;
+    Thread* thread = thread_list->SuspendThreadByPeer(peer,
+                                                      SuspendReason::kInternal,
+                                                      &timed_out);
     if (thread != nullptr) {
       // Must be runnable to create returned array.
       {
@@ -67,6 +70,9 @@ static ResultT GetThreadStack(const ScopedFastNativeObjectAccess& soa,
       // Restart suspended thread.
       bool resumed = thread_list->Resume(thread, SuspendReason::kInternal);
       DCHECK(resumed);
+    } else if (timed_out) {
+      LOG(ERROR) << "Trying to get thread's stack failed as the thread failed to suspend within a "
+          "generous timeout.";
     }
   }
   return trace;
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index fd67a0a7fa..570c554782 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -147,8 +147,11 @@ static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) {
   // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
   // in the DDMS send code.
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  bool timed_out;
   // Take suspend thread lock to avoid races with threads trying to suspend this one.
-  Thread* thread = thread_list->SuspendThreadByPeer(peer, SuspendReason::kInternal);
+  Thread* thread = thread_list->SuspendThreadByPeer(peer,
+                                                    SuspendReason::kInternal,
+                                                    &timed_out);
   if (thread != nullptr) {
     {
       ScopedObjectAccess soa(env);
@@ -156,6 +159,9 @@ static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) {
     }
     bool resumed = thread_list->Resume(thread, SuspendReason::kInternal);
     DCHECK(resumed);
+  } else if (timed_out) {
+    LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
+        "failed to suspend within a generous timeout.";
   }
 }
 
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index f20cd281e8..081ec2043a 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -59,6 +59,7 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th
     trace = Thread::InternalStackTraceToStackTraceElementArray(soa, internal_trace);
   } else {
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
+    bool timed_out;
 
     // Check for valid thread
     if (thin_lock_id == ThreadList::kInvalidThreadId) {
@@ -66,7 +67,9 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th
     }
 
     // Suspend thread to build stack trace.
-    Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, SuspendReason::kInternal);
+    Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id,
+                                                          SuspendReason::kInternal,
+                                                          &timed_out);
     if (thread != nullptr) {
       {
         ScopedObjectAccess soa(env);
@@ -76,6 +79,11 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th
       // Restart suspended thread.
       bool resumed = thread_list->Resume(thread, SuspendReason::kInternal);
       DCHECK(resumed);
+    } else {
+      if (timed_out) {
+        LOG(ERROR) << "Trying to get thread's stack by id failed as the thread failed to suspend "
+            "within a generous timeout.";
+      }
     }
   }
   return trace;
diff --git a/runtime/oat.h b/runtime/oat.h
index 2849b4e43c..20d32cbbb9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@ class InstructionSetFeatures;
 class PACKED(4) OatHeader {
  public:
   static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
-  // Last oat version changed reason: Change suspend barrier data structure.
-  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '3', '0', '\0' } };
+  // Last oat version changed reason: Add a new QuickEntryPoint for a String constructor.
+  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '2', '9', '\0' } };
 
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 5d45c599fe..4110ed2851 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -27,6 +27,7 @@
 #include "jni/jni_env_ext.h"
 #include "managed_stack-inl.h"
 #include "obj_ptr.h"
+#include "suspend_reason.h"
 #include "thread-current-inl.h"
 #include "thread_pool.h"
 
@@ -221,7 +222,7 @@ inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state
   }
 }
 
-inline void Thread::CheckActiveSuspendBarriers() {
+inline void Thread::PassActiveSuspendBarriers() {
   while (true) {
     StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
     if (LIKELY(!state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) &&
@@ -229,7 +230,7 @@ inline void Thread::CheckActiveSuspendBarriers() {
                !state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier))) {
       break;
     } else if (state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier)) {
-      PassActiveSuspendBarriers();
+      PassActiveSuspendBarriers(this);
     } else {
       // Impossible
       LOG(FATAL) << "Fatal, thread transitioned into suspended without running the checkpoint";
@@ -237,19 +238,6 @@ inline void Thread::CheckActiveSuspendBarriers() {
   }
 }
 
-inline void Thread::AddSuspend1Barrier(WrappedSuspend1Barrier* suspend1_barrier) {
-  suspend1_barrier->next_ = tlsPtr_.active_suspend1_barriers;
-  tlsPtr_.active_suspend1_barriers = suspend1_barrier;
-}
-
-inline void Thread::RemoveFirstSuspend1Barrier() {
-  tlsPtr_.active_suspend1_barriers =  tlsPtr_.active_suspend1_barriers->next_;
-}
-
-inline bool Thread::HasActiveSuspendBarrier() {
-  return tlsPtr_.active_suspend1_barriers != nullptr || tlsPtr_.active_suspendall_barrier != nullptr;
-}
-
 inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
   // Note: JNI stubs inline a fast path of this method that transitions to suspended if
   // there are no flags set and then clears the `held_mutexes[kMutatorLock]` (this comes
@@ -265,7 +253,7 @@ inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
   // Mark the release of the share of the mutator lock.
   GetMutatorLock()->TransitionFromRunnableToSuspended(this);
   // Once suspended - check the active suspend barrier flag
-  CheckActiveSuspendBarriers();
+  PassActiveSuspendBarriers();
 }
 
 inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
@@ -296,7 +284,7 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
         break;
       }
     } else if (old_state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier)) {
-      PassActiveSuspendBarriers();
+      PassActiveSuspendBarriers(this);
     } else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) ||
                         old_state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest))) {
       // Checkpoint flags should not be set while in suspended state.
@@ -436,80 +424,35 @@ inline void Thread::PoisonObjectPointersIfDebug() {
   }
 }
 
-inline void Thread::IncrementSuspendCount(Thread* self,
-                                          AtomicInteger* suspendall_barrier,
-                                          WrappedSuspend1Barrier* suspend1_barrier,
-                                          SuspendReason reason) {
-  if (kIsDebugBuild) {
-    Locks::thread_suspend_count_lock_->AssertHeld(self);
-    if (this != self && !IsSuspended()) {
-      Locks::thread_list_lock_->AssertHeld(self);
-    }
-  }
-  if (UNLIKELY(reason == SuspendReason::kForUserCode)) {
-    Locks::user_code_suspension_lock_->AssertHeld(self);
-  }
-
-  // Avoid deadlocks during a thread flip. b/31683379. This condition is not necessary for
-  // RunCheckpoint, but it does not use a suspend barrier.
-  DCHECK(!gUseReadBarrier
-         || this == self
-         || (suspendall_barrier == nullptr && suspend1_barrier == nullptr)
-         || GetFlipFunction() == nullptr);
-
-  uint32_t flags = enum_cast<uint32_t>(ThreadFlag::kSuspendRequest);
-  if (suspendall_barrier != nullptr) {
-    DCHECK(suspend1_barrier == nullptr);
-    DCHECK(tlsPtr_.active_suspendall_barrier == nullptr);
-    tlsPtr_.active_suspendall_barrier = suspendall_barrier;
-    flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
-  } else if (suspend1_barrier != nullptr) {
-    AddSuspend1Barrier(suspend1_barrier);
-    flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
-  }
-
-  ++tls32_.suspend_count;
-  if (reason == SuspendReason::kForUserCode) {
-    ++tls32_.user_code_suspend_count;
-  }
-
-  // Two bits might be set simultaneously.
-  tls32_.state_and_flags.fetch_or(flags, std::memory_order_seq_cst);
-  TriggerSuspend();
-}
-
-inline void Thread::DecrementSuspendCount(Thread* self, SuspendReason reason) {
-  if (kIsDebugBuild) {
-    Locks::thread_suspend_count_lock_->AssertHeld(self);
-    if (this != self && !IsSuspended()) {
-      Locks::thread_list_lock_->AssertHeld(self);
-    }
-  }
-  if (UNLIKELY(tls32_.suspend_count <= 0)) {
-    UnsafeLogFatalForSuspendCount(self, this);
-    UNREACHABLE();
-  }
-  if (UNLIKELY(reason == SuspendReason::kForUserCode)) {
-    Locks::user_code_suspension_lock_->AssertHeld(self);
-    if (UNLIKELY(tls32_.user_code_suspend_count <= 0)) {
-      LOG(ERROR) << "user_code_suspend_count incorrect";
-      UnsafeLogFatalForSuspendCount(self, this);
-      UNREACHABLE();
+inline bool Thread::ModifySuspendCount(Thread* self,
+                                       int delta,
+                                       AtomicInteger* suspend_barrier,
+                                       SuspendReason reason) {
+  if (delta > 0 && ((gUseReadBarrier && this != self) || suspend_barrier != nullptr)) {
+    // When delta > 0 (requesting a suspend), ModifySuspendCountInternal() may fail either if
+    // active_suspend_barriers is full or we are in the middle of a thread flip. Retry in a loop.
+    while (true) {
+      if (LIKELY(ModifySuspendCountInternal(self, delta, suspend_barrier, reason))) {
+        return true;
+      } else {
+        // Failure means the list of active_suspend_barriers is full or we are in the middle of a
+        // thread flip, we should release the thread_suspend_count_lock_ (to avoid deadlock) and
+        // wait till the target thread has executed or Thread::PassActiveSuspendBarriers() or the
+        // flip function. Note that we could not simply wait for the thread to change to a suspended
+        // state, because it might need to run checkpoint function before the state change or
+        // resumes from the resume_cond_, which also needs thread_suspend_count_lock_.
+        //
+        // The list of active_suspend_barriers is very unlikely to be full since more than
+        // kMaxSuspendBarriers threads need to execute SuspendAllInternal() simultaneously, and
+        // target thread stays in kRunnable in the mean time.
+        Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
+        NanoSleep(100000);
+        Locks::thread_suspend_count_lock_->ExclusiveLock(self);
+      }
     }
+  } else {
+    return ModifySuspendCountInternal(self, delta, suspend_barrier, reason);
   }
-
-  --tls32_.suspend_count;
-  if (reason == SuspendReason::kForUserCode) {
-    --tls32_.user_code_suspend_count;
-  }
-
-  if (tls32_.suspend_count == 0) {
-    AtomicClearFlag(ThreadFlag::kSuspendRequest);
-  }
-}
-
-inline void Thread::IncrementSuspendCount(Thread* self) {
-  IncrementSuspendCount(self, nullptr, nullptr, SuspendReason::kInternal);
 }
 
 inline ShadowFrame* Thread::PushShadowFrame(ShadowFrame* new_top_frame) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 14ddf34f4a..ad6b8c6118 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1443,8 +1443,7 @@ uint64_t Thread::GetCpuMicroTime() const {
 }
 
 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
-void Thread::UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread)
-    NO_THREAD_SAFETY_ANALYSIS {
+static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
   LOG(ERROR) << *thread << " suspend count already zero.";
   Locks::thread_suspend_count_lock_->Unlock(self);
   if (!Locks::mutator_lock_->IsSharedHeld(self)) {
@@ -1462,51 +1461,141 @@ void Thread::UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread)
   std::ostringstream ss;
   Runtime::Current()->GetThreadList()->Dump(ss);
   LOG(FATAL) << ss.str();
-  UNREACHABLE();
 }
 
-bool Thread::PassActiveSuspendBarriers() {
-  DCHECK_EQ(this, Thread::Current());
-  // Grab the suspend_count lock and copy the current set of barriers. Then clear the list and the
-  // flag. The IncrementSuspendCount function requires the lock so we prevent a race between setting
+bool Thread::ModifySuspendCountInternal(Thread* self,
+                                        int delta,
+                                        AtomicInteger* suspend_barrier,
+                                        SuspendReason reason) {
+  if (kIsDebugBuild) {
+    DCHECK(delta == -1 || delta == +1)
+          << reason << " " << delta << " " << this;
+    Locks::thread_suspend_count_lock_->AssertHeld(self);
+    if (this != self && !IsSuspended()) {
+      Locks::thread_list_lock_->AssertHeld(self);
+    }
+  }
+  // User code suspensions need to be checked more closely since they originate from code outside of
+  // the runtime's control.
+  if (UNLIKELY(reason == SuspendReason::kForUserCode)) {
+    Locks::user_code_suspension_lock_->AssertHeld(self);
+    if (UNLIKELY(delta + tls32_.user_code_suspend_count < 0)) {
+      LOG(ERROR) << "attempting to modify suspend count in an illegal way.";
+      return false;
+    }
+  }
+  if (UNLIKELY(delta < 0 && tls32_.suspend_count <= 0)) {
+    UnsafeLogFatalForSuspendCount(self, this);
+    return false;
+  }
+
+  if (gUseReadBarrier && delta > 0 && this != self && tlsPtr_.flip_function != nullptr) {
+    // Force retry of a suspend request if it's in the middle of a thread flip to avoid a
+    // deadlock. b/31683379.
+    return false;
+  }
+
+  uint32_t flags = enum_cast<uint32_t>(ThreadFlag::kSuspendRequest);
+  if (delta > 0 && suspend_barrier != nullptr) {
+    uint32_t available_barrier = kMaxSuspendBarriers;
+    for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+      if (tlsPtr_.active_suspend_barriers[i] == nullptr) {
+        available_barrier = i;
+        break;
+      }
+    }
+    if (available_barrier == kMaxSuspendBarriers) {
+      // No barrier spaces available, we can't add another.
+      return false;
+    }
+    tlsPtr_.active_suspend_barriers[available_barrier] = suspend_barrier;
+    flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
+  }
+
+  tls32_.suspend_count += delta;
+  switch (reason) {
+    case SuspendReason::kForUserCode:
+      tls32_.user_code_suspend_count += delta;
+      break;
+    case SuspendReason::kInternal:
+      break;
+  }
+
+  if (tls32_.suspend_count == 0) {
+    AtomicClearFlag(ThreadFlag::kSuspendRequest);
+  } else {
+    // Two bits might be set simultaneously.
+    tls32_.state_and_flags.fetch_or(flags, std::memory_order_seq_cst);
+    TriggerSuspend();
+  }
+  return true;
+}
+
+bool Thread::PassActiveSuspendBarriers(Thread* self) {
+  // Grab the suspend_count lock and copy the current set of
+  // barriers. Then clear the list and the flag. The ModifySuspendCount
+  // function requires the lock so we prevent a race between setting
   // the kActiveSuspendBarrier flag and clearing it.
-  std::vector<AtomicInteger*> pass_barriers{};
+  AtomicInteger* pass_barriers[kMaxSuspendBarriers];
   {
-    MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     if (!ReadFlag(ThreadFlag::kActiveSuspendBarrier)) {
-      // quick exit test: the barriers have already been claimed - this is possible as there may
-      // be a race to claim and it doesn't matter who wins.
-      // All of the callers of this function (except the SuspendAllInternal) will first test the
-      // kActiveSuspendBarrier flag without lock. Here double-check whether the barrier has been
-      // passed with the suspend_count lock.
+      // quick exit test: the barriers have already been claimed - this is
+      // possible as there may be a race to claim and it doesn't matter
+      // who wins.
+      // All of the callers of this function (except the SuspendAllInternal)
+      // will first test the kActiveSuspendBarrier flag without lock. Here
+      // double-check whether the barrier has been passed with the
+      // suspend_count lock.
       return false;
     }
-    if (tlsPtr_.active_suspendall_barrier != nullptr) {
-      pass_barriers.push_back(tlsPtr_.active_suspendall_barrier);
-      tlsPtr_.active_suspendall_barrier = nullptr;
-    }
-    for (WrappedSuspend1Barrier* w = tlsPtr_.active_suspend1_barriers; w != nullptr; w = w->next_) {
-      pass_barriers.push_back(&(w->barrier_));
+
+    for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+      pass_barriers[i] = tlsPtr_.active_suspend_barriers[i];
+      tlsPtr_.active_suspend_barriers[i] = nullptr;
     }
-    tlsPtr_.active_suspend1_barriers = nullptr;
     AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
   }
 
   uint32_t barrier_count = 0;
-  for (AtomicInteger* barrier : pass_barriers) {
-    ++barrier_count;
-    int32_t old_val = barrier->fetch_sub(1, std::memory_order_release);
-    CHECK_GT(old_val, 0) << "Unexpected value for PassActiveSuspendBarriers(): " << old_val;
+  for (uint32_t i = 0; i < kMaxSuspendBarriers; i++) {
+    AtomicInteger* pending_threads = pass_barriers[i];
+    if (pending_threads != nullptr) {
+      bool done = false;
+      do {
+        int32_t cur_val = pending_threads->load(std::memory_order_relaxed);
+        CHECK_GT(cur_val, 0) << "Unexpected value for PassActiveSuspendBarriers(): " << cur_val;
+        // Reduce value by 1.
+        done = pending_threads->CompareAndSetWeakRelaxed(cur_val, cur_val - 1);
 #if ART_USE_FUTEXES
-    if (old_val == 1) {
-      futex(barrier->Address(), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
-    }
+        if (done && (cur_val - 1) == 0) {  // Weak CAS may fail spuriously.
+          futex(pending_threads->Address(), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
+        }
 #endif
+      } while (!done);
+      ++barrier_count;
+    }
   }
   CHECK_GT(barrier_count, 0U);
   return true;
 }
 
+void Thread::ClearSuspendBarrier(AtomicInteger* target) {
+  CHECK(ReadFlag(ThreadFlag::kActiveSuspendBarrier));
+  bool clear_flag = true;
+  for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+    AtomicInteger* ptr = tlsPtr_.active_suspend_barriers[i];
+    if (ptr == target) {
+      tlsPtr_.active_suspend_barriers[i] = nullptr;
+    } else if (ptr != nullptr) {
+      clear_flag = false;
+    }
+  }
+  if (LIKELY(clear_flag)) {
+    AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
+  }
+}
+
 void Thread::RunCheckpointFunction() {
   // If this thread is suspended and another thread is running the checkpoint on its behalf,
   // we may have a pending flip function that we need to run for the sake of those checkpoints
@@ -1558,25 +1647,28 @@ void Thread::RunEmptyCheckpoint() {
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
-  StateAndFlags old_state_and_flags(0 /* unused */), new_state_and_flags(0 /* unused */);
-  do {
-    old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
-    if (old_state_and_flags.GetState() != ThreadState::kRunnable) {
-      return false;  // Fail, thread is suspended and so can't run a checkpoint.
-    }
-    new_state_and_flags = old_state_and_flags;
-    new_state_and_flags.SetFlag(ThreadFlag::kCheckpointRequest);
-  } while (!tls32_.state_and_flags.CompareAndSetWeakSequentiallyConsistent(
-      old_state_and_flags.GetValue(), new_state_and_flags.GetValue()));
-  // Succeeded setting checkpoint flag, now insert the actual checkpoint.
-  if (tlsPtr_.checkpoint_function == nullptr) {
-    tlsPtr_.checkpoint_function = function;
-  } else {
-    checkpoint_overflow_.push_back(function);
+  StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
+  if (old_state_and_flags.GetState() != ThreadState::kRunnable) {
+    return false;  // Fail, thread is suspended and so can't run a checkpoint.
   }
-  DCHECK(ReadFlag(ThreadFlag::kCheckpointRequest));
-  TriggerSuspend();
-  return true;
+
+  // We must be runnable to request a checkpoint.
+  DCHECK_EQ(old_state_and_flags.GetState(), ThreadState::kRunnable);
+  StateAndFlags new_state_and_flags = old_state_and_flags;
+  new_state_and_flags.SetFlag(ThreadFlag::kCheckpointRequest);
+  bool success = tls32_.state_and_flags.CompareAndSetStrongSequentiallyConsistent(
+      old_state_and_flags.GetValue(), new_state_and_flags.GetValue());
+  if (success) {
+    // Succeeded setting checkpoint flag, now insert the actual checkpoint.
+    if (tlsPtr_.checkpoint_function == nullptr) {
+      tlsPtr_.checkpoint_function = function;
+    } else {
+      checkpoint_overflow_.push_back(function);
+    }
+    CHECK(ReadFlag(ThreadFlag::kCheckpointRequest));
+    TriggerSuspend();
+  }
+  return success;
 }
 
 bool Thread::RequestEmptyCheckpoint() {
@@ -1655,85 +1747,85 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend
     Thread* self_thread;
   };
 
-  Locks::thread_list_lock_->AssertExclusiveHeld(self);
-  // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
-  // suspend-count lock for too long.
-  if (GetState() == ThreadState::kRunnable) {
-    BarrierClosure barrier_closure(function);
-    bool installed = false;
-    {
-      MutexLock mu(self, *Locks::thread_suspend_count_lock_);
-      installed = RequestCheckpoint(&barrier_closure);
-    }
-    if (installed) {
-      // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
-      // reacquire it since we don't know if 'this' hasn't been deleted yet.
-      Locks::thread_list_lock_->ExclusiveUnlock(self);
-      ScopedThreadStateChange sts(self, suspend_state);
-      barrier_closure.Wait(self, suspend_state);
-      return true;
+  for (;;) {
+    Locks::thread_list_lock_->AssertExclusiveHeld(self);
+    // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
+    // suspend-count lock for too long.
+    if (GetState() == ThreadState::kRunnable) {
+      BarrierClosure barrier_closure(function);
+      bool installed = false;
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        installed = RequestCheckpoint(&barrier_closure);
+      }
+      if (installed) {
+        // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
+        // reacquire it since we don't know if 'this' hasn't been deleted yet.
+        Locks::thread_list_lock_->ExclusiveUnlock(self);
+        ScopedThreadStateChange sts(self, suspend_state);
+        barrier_closure.Wait(self, suspend_state);
+        return true;
+      }
+      // Fall-through.
     }
-    // No longer runnable. Fall-through.
-  }
 
-  // This thread is not runnable, make sure we stay suspended, then run the checkpoint.
-  // Note: IncrementSuspendCount also expects the thread_list_lock to be held in
-  //       certain situations.
-  while (true) {
-    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    // This thread is not runnable, make sure we stay suspended, then run the checkpoint.
+    // Note: ModifySuspendCountInternal also expects the thread_list_lock to be held in
+    //       certain situations.
+    {
+      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
 
-    if (GetFlipFunction() == nullptr) {
-      IncrementSuspendCount(self);
-      break;
+      if (!ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal)) {
+        // Just retry the loop.
+        sched_yield();
+        continue;
+      }
     }
-    DCHECK(gUseReadBarrier);  // Flip functions are only used by CC collector.
-    sched_yield();
-  }
 
-  {
-    // Release for the wait. The suspension will keep the target thread from being unregistered
-    // and deleted.  Reacquire after so that we can call DecrementSuspendCount without racing
-    // against ThreadList::Unregister.
-    ScopedThreadListLockUnlock stllu(self);
     {
-      ScopedThreadStateChange sts(self, suspend_state);
-      while (GetState() == ThreadState::kRunnable) {
-        // The target thread became runnable again. Wait till the suspend triggered in
-        // IncrementSuspendCount moves the target thread to suspended.
-        sched_yield();
+      // Release for the wait. The suspension will keep us from being deleted. Reacquire after so
+      // that we can call ModifySuspendCount without racing against ThreadList::Unregister.
+      ScopedThreadListLockUnlock stllu(self);
+      {
+        ScopedThreadStateChange sts(self, suspend_state);
+        while (GetState() == ThreadState::kRunnable) {
+          // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
+          // moves us to suspended.
+          sched_yield();
+        }
       }
-    }
 
-    function->Run(this);
-  }
+      function->Run(this);
+    }
 
-  {
-    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    {
+      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
 
-    // Target was in a suspended state with a nonzero suspend count, so it must stay suspended.
-    DCHECK_NE(GetState(), ThreadState::kRunnable);
-    DecrementSuspendCount(self);
-  }
+      DCHECK_NE(GetState(), ThreadState::kRunnable);
+      bool updated = ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+      DCHECK(updated);
+    }
 
-  {
-    // Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its
-    // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
-    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-    Thread::resume_cond_->Broadcast(self);
-  }
+    {
+      // Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its
+      // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
+      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+      Thread::resume_cond_->Broadcast(self);
+    }
 
-  // Release the thread_list_lock_ to be consistent with the barrier-closure path.
-  Locks::thread_list_lock_->ExclusiveUnlock(self);
+    // Release the thread_list_lock_ to be consistent with the barrier-closure path.
+    Locks::thread_list_lock_->ExclusiveUnlock(self);
 
-  return true;  // We're done, break out of the loop.
+    return true;  // We're done, break out of the loop.
+  }
 }
 
 void Thread::SetFlipFunction(Closure* function) {
   // This is called with all threads suspended, except for the calling thread.
   DCHECK(IsSuspended() || Thread::Current() == this);
   DCHECK(function != nullptr);
-  DCHECK(GetFlipFunction() == nullptr);
-  tlsPtr_.flip_function.store(function, std::memory_order_relaxed);
+  DCHECK(tlsPtr_.flip_function == nullptr);
+  tlsPtr_.flip_function = function;
   DCHECK(!GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags()));
   AtomicSetFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_release);
 }
@@ -1765,8 +1857,8 @@ void Thread::RunFlipFunction(Thread* self, bool notify) {
   // no other thread may run checkpoints on a thread that's actually Runnable.
   DCHECK_EQ(notify, ReadFlag(ThreadFlag::kRunningFlipFunction));
 
-  Closure* flip_function = GetFlipFunction();
-  tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed);
+  Closure* flip_function = tlsPtr_.flip_function;
+  tlsPtr_.flip_function = nullptr;
   DCHECK(flip_function != nullptr);
   flip_function->Run(this);
 
@@ -2134,26 +2226,19 @@ struct StackDumpVisitor : public MonitorObjectsStackVisitor {
     if (obj == nullptr) {
       os << msg << "an unknown object";
     } else {
-      const std::string pretty_type(obj->PrettyTypeOf());
-      // It's often unsafe to allow lock inflation here. We may be the only runnable thread, or
-      // this may be called from a checkpoint. We get the hashcode on a best effort basis.
-      static constexpr int kNumRetries = 3;
-      static constexpr int kSleepMicros = 10;
-      int32_t hash_code;
-      for (int i = 0;; ++i) {
-        hash_code = obj->IdentityHashCodeNoInflation();
-        if (hash_code != 0 || i == kNumRetries) {
-          break;
-        }
-        usleep(kSleepMicros);
-      }
-      if (hash_code == 0) {
+      if ((obj->GetLockWord(true).GetState() == LockWord::kThinLocked) &&
+          Locks::mutator_lock_->IsExclusiveHeld(Thread::Current())) {
+        // Getting the identity hashcode here would result in lock inflation and suspension of the
+        // current thread, which isn't safe if this is the only runnable thread.
         os << msg << StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)",
                                   reinterpret_cast<intptr_t>(obj.Ptr()),
-                                  pretty_type.c_str());
+                                  obj->PrettyTypeOf().c_str());
       } else {
-        // - waiting on <0x608c468> (a java.lang.Class<java.lang.ref.ReferenceQueue>)
-        os << msg << StringPrintf("<0x%08x> (a %s)", hash_code, pretty_type.c_str());
+        // - waiting on <0x6008c468> (a java.lang.Class<java.lang.ref.ReferenceQueue>)
+        // Call PrettyTypeOf before IdentityHashCode since IdentityHashCode can cause thread
+        // suspension and move pretty_object.
+        const std::string pretty_type(obj->PrettyTypeOf());
+        os << msg << StringPrintf("<0x%08x> (a %s)", obj->IdentityHashCode(), pretty_type.c_str());
       }
     }
     if (owner_tid != ThreadList::kInvalidThreadId) {
@@ -2385,9 +2470,10 @@ Thread::Thread(bool daemon)
             tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
             gc::allocator::RosAlloc::GetDedicatedFullRun());
   tlsPtr_.checkpoint_function = nullptr;
-  tlsPtr_.active_suspendall_barrier = nullptr;
-  tlsPtr_.active_suspend1_barriers = nullptr;
-  tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed);
+  for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+    tlsPtr_.active_suspend_barriers[i] = nullptr;
+  }
+  tlsPtr_.flip_function = nullptr;
   tlsPtr_.thread_local_mark_stack = nullptr;
   tls32_.is_transitioning_to_runnable = false;
   ResetTlab();
@@ -2535,7 +2621,7 @@ Thread::~Thread() {
   CHECK(!ReadFlag(ThreadFlag::kEmptyCheckpointRequest));
   CHECK(tlsPtr_.checkpoint_function == nullptr);
   CHECK_EQ(checkpoint_overflow_.size(), 0u);
-  CHECK(GetFlipFunction() == nullptr);
+  CHECK(tlsPtr_.flip_function == nullptr);
   CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
 
   // Make sure we processed all deoptimization requests.
diff --git a/runtime/thread.h b/runtime/thread.h
index 044e9dbd14..dda4c086f4 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -47,7 +47,6 @@
 #include "reflective_handle_scope.h"
 #include "runtime_globals.h"
 #include "runtime_stats.h"
-#include "suspend_reason.h"
 #include "thread_state.h"
 
 namespace unwindstack {
@@ -103,6 +102,7 @@ class RootVisitor;
 class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class StackedShadowFrameRecord;
+enum class SuspendReason : char;
 class Thread;
 class ThreadList;
 enum VisitRootFlags : uint8_t;
@@ -134,7 +134,6 @@ enum class ThreadFlag : uint32_t {
   kEmptyCheckpointRequest = 1u << 2,
 
   // Register that at least 1 suspend barrier needs to be passed.
-  // Changes to this flag are guarded by suspend_count_lock_ .
   kActiveSuspendBarrier = 1u << 3,
 
   // Marks that a "flip function" needs to be executed on this thread.
@@ -142,7 +141,7 @@ enum class ThreadFlag : uint32_t {
 
   // Marks that the "flip function" is being executed by another thread.
   //
-  // This is used to guard against multiple threads trying to run the
+  // This is used to guards against multiple threads trying to run the
   // "flip function" for the same thread while the thread is suspended.
   //
   // This is not needed when the thread is running the flip function
@@ -189,13 +188,6 @@ enum class WeakRefAccessState : int32_t {
   kDisabled
 };
 
-// See Thread.tlsPtr_.active_suspend1_barriers below for explanation.
-struct WrappedSuspend1Barrier {
-  WrappedSuspend1Barrier() : barrier_(1), next_(nullptr) {}
-  AtomicInteger barrier_;
-  struct WrappedSuspend1Barrier* next_ GUARDED_BY(Locks::thread_suspend_count_lock_);
-};
-
 // This should match RosAlloc::kNumThreadLocalSizeBrackets.
 static constexpr size_t kNumRosAllocThreadLocalSizeBracketsInThread = 16;
 
@@ -323,7 +315,7 @@ class Thread {
   }
 
   bool IsSuspended() const {
-    StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_acquire);
+    StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
     return state_and_flags.GetState() != ThreadState::kRunnable &&
            state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest);
   }
@@ -339,32 +331,20 @@ class Thread {
     return tls32_.define_class_counter;
   }
 
-  // Increment suspend count and optionally install at most one suspend barrier.
-  // Should not be invoked on another thread while the target's flip_function is not null.
+  // If delta > 0 and (this != self or suspend_barrier is not null), this function may temporarily
+  // release thread_suspend_count_lock_ internally.
   ALWAYS_INLINE
-  void IncrementSuspendCount(Thread* self,
-                             AtomicInteger* suspend_all_barrier,
-                             WrappedSuspend1Barrier* suspend1_barrier,
-                             SuspendReason reason)
+  bool ModifySuspendCount(Thread* self,
+                          int delta,
+                          AtomicInteger* suspend_barrier,
+                          SuspendReason reason)
+      WARN_UNUSED
       REQUIRES(Locks::thread_suspend_count_lock_);
 
-  // The same, but default reason to kInternal, and barriers to nullptr.
-  ALWAYS_INLINE void IncrementSuspendCount(Thread* self)
-      REQUIRES(Locks::thread_suspend_count_lock_);
-
-  ALWAYS_INLINE void DecrementSuspendCount(Thread* self,
-                                           SuspendReason reason = SuspendReason::kInternal)
-      REQUIRES(Locks::thread_suspend_count_lock_);
-
- private:
-  NO_RETURN static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread);
-
- public:
   // Requests a checkpoint closure to run on another thread. The closure will be run when the
   // thread notices the request, either in an explicit runtime CheckSuspend() call, or in a call
   // originating from a compiler generated suspend point check. This returns true if the closure
-  // was added and will (eventually) be executed. It returns false if the thread was
-  // either suspended or in the event of a spurious WeakCAS failure.
+  // was added and will (eventually) be executed. It returns false otherwise.
   //
   // Since multiple closures can be queued and some closures can delay other threads from running,
   // no closure should attempt to suspend another thread while running.
@@ -394,14 +374,8 @@ class Thread {
   bool RequestEmptyCheckpoint()
       REQUIRES(Locks::thread_suspend_count_lock_);
 
-  Closure* GetFlipFunction() {
-    return tlsPtr_.flip_function.load(std::memory_order_relaxed);
-  }
-
   // Set the flip function. This is done with all threads suspended, except for the calling thread.
-  void SetFlipFunction(Closure* function)
-      REQUIRES(Locks::thread_suspend_count_lock_)
-      REQUIRES(Locks::thread_list_lock_);
+  void SetFlipFunction(Closure* function);
 
   // Ensure that thread flip function started running. If no other thread is executing
   // it, the calling thread shall run the flip function and then notify other threads
@@ -1242,27 +1216,8 @@ class Thread {
     tlsPtr_.held_mutexes[level] = mutex;
   }
 
-  // Possibly check that no mutexes at level kMonitorLock or above are subsequently acquired.
-  // Only invoked by the thread itself.
-  void DisallowPreMonitorMutexes() {
-    if (kIsDebugBuild) {
-      CHECK(this == Thread::Current());
-      CHECK(GetHeldMutex(kMonitorLock) == nullptr);
-      // Pretend we hold a kMonitorLock level mutex to detect disallowed mutex
-      // acquisitions by checkpoint Run() methods.  We don't normally register or thus check
-      // kMonitorLock level mutexes, but this is an exception.
-      static Mutex dummy_mutex("checkpoint dummy mutex", kMonitorLock);
-      SetHeldMutex(kMonitorLock, &dummy_mutex);
-    }
-  }
-
-  // Undo the effect of the previous call. Again only invoked by the thread itself.
-  void AllowPreMonitorMutexes() {
-    if (kIsDebugBuild) {
-      CHECK(GetHeldMutex(kMonitorLock) != nullptr);
-      SetHeldMutex(kMonitorLock, nullptr);
-    }
-  }
+  void ClearSuspendBarrier(AtomicInteger* target)
+      REQUIRES(Locks::thread_suspend_count_lock_);
 
   bool ReadFlag(ThreadFlag flag) const {
     return GetStateAndFlags(std::memory_order_relaxed).IsFlagSet(flag);
@@ -1510,7 +1465,6 @@ class Thread {
 
  private:
   explicit Thread(bool daemon);
-  // A runnable thread should only be deleted from the thread itself.
   ~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
   void Destroy();
 
@@ -1538,8 +1492,7 @@ class Thread {
 
   // Avoid use, callers should use SetState.
   // Used only by `Thread` destructor and stack trace collection in semi-space GC (currently
-  // disabled by `kStoreStackTraces = false`). May not be called on a runnable thread other
-  // than Thread::Current().
+  // disabled by `kStoreStackTraces = false`).
   // NO_THREAD_SAFETY_ANALYSIS: This function is "Unsafe" and can be called in
   // different states, so clang cannot perform the thread safety analysis.
   ThreadState SetStateUnsafe(ThreadState new_state) NO_THREAD_SAFETY_ANALYSIS {
@@ -1548,13 +1501,12 @@ class Thread {
     if (old_state == new_state) {
       // Nothing to do.
     } else if (old_state == ThreadState::kRunnable) {
-      DCHECK_EQ(this, Thread::Current());
       // Need to run pending checkpoint and suspend barriers. Run checkpoints in runnable state in
       // case they need to use a ScopedObjectAccess. If we are holding the mutator lock and a SOA
       // attempts to TransitionFromSuspendedToRunnable, it results in a deadlock.
       TransitionToSuspendedAndRunCheckpoints(new_state);
       // Since we transitioned to a suspended state, check the pass barrier requests.
-      CheckActiveSuspendBarriers();
+      PassActiveSuspendBarriers();
     } else {
       while (true) {
         StateAndFlags new_state_and_flags = old_state_and_flags;
@@ -1627,26 +1579,9 @@ class Thread {
       REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Call PassActiveSuspendBarriers() if there are active barriers. Only called on current thread.
-  ALWAYS_INLINE void CheckActiveSuspendBarriers()
+  ALWAYS_INLINE void PassActiveSuspendBarriers()
       REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_);
 
-  // Decrement all "suspend barriers" for the current thread, notifying threads that requested our
-  // suspension. Only called on current thread.
-  bool PassActiveSuspendBarriers()
-      REQUIRES(!Locks::thread_suspend_count_lock_);
-
-  // Add an entry to active_suspend1_barriers.
-  ALWAYS_INLINE void AddSuspend1Barrier(WrappedSuspend1Barrier* suspend1_barrier)
-      REQUIRES(Locks::thread_suspend_count_lock_);
-
-  // Remove last-added entry from active_suspend1_barriers.
-  // Only makes sense if we're still holding thread_suspend_count_lock_ since insertion.
-  ALWAYS_INLINE void RemoveFirstSuspend1Barrier()
-      REQUIRES(Locks::thread_suspend_count_lock_);
-
-  ALWAYS_INLINE bool HasActiveSuspendBarrier() REQUIRES(Locks::thread_suspend_count_lock_);
-
   // Registers the current thread as the jit sensitive thread. Should be called just once.
   static void SetJitSensitiveThread() {
     if (jit_sensitive_thread_ == nullptr) {
@@ -1661,6 +1596,13 @@ class Thread {
     is_sensitive_thread_hook_ = is_sensitive_thread_hook;
   }
 
+  bool ModifySuspendCountInternal(Thread* self,
+                                  int delta,
+                                  AtomicInteger* suspend_barrier,
+                                  SuspendReason reason)
+      WARN_UNUSED
+      REQUIRES(Locks::thread_suspend_count_lock_);
+
   // Runs a single checkpoint function. If there are no more pending checkpoint functions it will
   // clear the kCheckpointRequest flag. The caller is responsible for calling this in a loop until
   // the kCheckpointRequest flag is cleared.
@@ -1669,6 +1611,9 @@ class Thread {
       REQUIRES_SHARED(Locks::mutator_lock_);
   void RunEmptyCheckpoint();
 
+  bool PassActiveSuspendBarriers(Thread* self)
+      REQUIRES(!Locks::thread_suspend_count_lock_);
+
   // Install the protected region for implicit stack checks.
   void InstallImplicitProtection();
 
@@ -1951,47 +1896,45 @@ class Thread {
   } tls64_;
 
   struct PACKED(sizeof(void*)) tls_ptr_sized_values {
-    tls_ptr_sized_values() : card_table(nullptr),
-                             exception(nullptr),
-                             stack_end(nullptr),
-                             managed_stack(),
-                             suspend_trigger(nullptr),
-                             jni_env(nullptr),
-                             tmp_jni_env(nullptr),
-                             self(nullptr),
-                             opeer(nullptr),
-                             jpeer(nullptr),
-                             stack_begin(nullptr),
-                             stack_size(0),
-                             deps_or_stack_trace_sample(),
-                             wait_next(nullptr),
-                             monitor_enter_object(nullptr),
-                             top_handle_scope(nullptr),
-                             class_loader_override(nullptr),
-                             long_jump_context(nullptr),
-                             instrumentation_stack(nullptr),
-                             stacked_shadow_frame_record(nullptr),
-                             deoptimization_context_stack(nullptr),
-                             frame_id_to_shadow_frame(nullptr),
-                             name(nullptr),
-                             pthread_self(0),
-                             last_no_thread_suspension_cause(nullptr),
-                             checkpoint_function(nullptr),
-                             active_suspendall_barrier(nullptr),
-                             active_suspend1_barriers(nullptr),
-                             thread_local_pos(nullptr),
-                             thread_local_end(nullptr),
-                             thread_local_start(nullptr),
-                             thread_local_limit(nullptr),
-                             thread_local_objects(0),
-                             thread_local_alloc_stack_top(nullptr),
-                             thread_local_alloc_stack_end(nullptr),
-                             mutator_lock(nullptr),
-                             flip_function(nullptr),
-                             method_verifier(nullptr),
-                             thread_local_mark_stack(nullptr),
-                             async_exception(nullptr),
-                             top_reflective_handle_scope(nullptr) {
+      tls_ptr_sized_values() : card_table(nullptr),
+                               exception(nullptr),
+                               stack_end(nullptr),
+                               managed_stack(),
+                               suspend_trigger(nullptr),
+                               jni_env(nullptr),
+                               tmp_jni_env(nullptr),
+                               self(nullptr),
+                               opeer(nullptr),
+                               jpeer(nullptr),
+                               stack_begin(nullptr),
+                               stack_size(0),
+                               deps_or_stack_trace_sample(),
+                               wait_next(nullptr),
+                               monitor_enter_object(nullptr),
+                               top_handle_scope(nullptr),
+                               class_loader_override(nullptr),
+                               long_jump_context(nullptr),
+                               instrumentation_stack(nullptr),
+                               stacked_shadow_frame_record(nullptr),
+                               deoptimization_context_stack(nullptr),
+                               frame_id_to_shadow_frame(nullptr),
+                               name(nullptr),
+                               pthread_self(0),
+                               last_no_thread_suspension_cause(nullptr),
+                               checkpoint_function(nullptr),
+                               thread_local_start(nullptr),
+                               thread_local_pos(nullptr),
+                               thread_local_end(nullptr),
+                               thread_local_limit(nullptr),
+                               thread_local_objects(0),
+                               thread_local_alloc_stack_top(nullptr),
+                               thread_local_alloc_stack_end(nullptr),
+                               mutator_lock(nullptr),
+                               flip_function(nullptr),
+                               method_verifier(nullptr),
+                               thread_local_mark_stack(nullptr),
+                               async_exception(nullptr),
+                               top_reflective_handle_scope(nullptr) {
       std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr);
     }
 
@@ -2101,30 +2044,20 @@ class Thread {
     // requests another checkpoint, it goes to the checkpoint overflow list.
     Closure* checkpoint_function GUARDED_BY(Locks::thread_suspend_count_lock_);
 
-    // After a thread observes a suspend request and enters a suspended state,
-    // it notifies the requestor by arriving at a "suspend barrier". This consists of decrementing
-    // the atomic integer representing the barrier. (This implementation was introduced in 2015 to
-    // minimize cost. There may be other options.) These atomic integer barriers are always
-    // stored on the requesting thread's stack. They are referenced from the target thread's
-    // data structure in one of two ways; in either case the data structure referring to these
-    // barriers is guarded by suspend_count_lock:
-    // 1. A SuspendAll barrier is directly referenced from the target thread. Only one of these
-    // can be active at a time:
-    AtomicInteger* active_suspendall_barrier GUARDED_BY(Locks::thread_suspend_count_lock_);
-    // 2. For individual thread suspensions, active barriers are embedded in a struct that is used
-    // to link together all suspend requests for this thread. Unlike the SuspendAll case, each
-    // barrier is referenced by a single target thread, and thus can appear only on a single list.
-    // The struct as a whole is still stored on the requesting thread's stack.
-    WrappedSuspend1Barrier* active_suspend1_barriers GUARDED_BY(Locks::thread_suspend_count_lock_);
+    // Pending barriers that require passing or NULL if non-pending. Installation guarding by
+    // Locks::thread_suspend_count_lock_.
+    // They work effectively as art::Barrier, but implemented directly using AtomicInteger and futex
+    // to avoid additional cost of a mutex and a condition variable, as used in art::Barrier.
+    AtomicInteger* active_suspend_barriers[kMaxSuspendBarriers];
+
+    // Thread-local allocation pointer. Moved here to force alignment for thread_local_pos on ARM.
+    uint8_t* thread_local_start;
 
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
 
-    // Thread-local allocation pointer. Can be moved above the preceding two to correct alignment.
-    uint8_t* thread_local_start;
-
     // Thread local limit is how much we can expand the thread local buffer to, it is greater or
     // equal to thread_local_end.
     uint8_t* thread_local_limit;
@@ -2151,8 +2084,7 @@ class Thread {
     BaseMutex* held_mutexes[kLockLevelCount];
 
     // The function used for thread flip.
-    // Set only while holding Locks::thread_suspend_count_lock_ . May be cleared while being read.
-    std::atomic<Closure*> flip_function;
+    Closure* flip_function;
 
     // Current method verifier, used for root marking.
     verifier::MethodVerifier* method_verifier;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 14fa31b77d..f36e922fa6 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -64,7 +64,10 @@ namespace art {
 using android::base::StringPrintf;
 
 static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
-static constexpr useconds_t kThreadSuspendSleepUs = 100;
+// Use 0 since we want to yield to prevent blocking for an unpredictable amount of time.
+static constexpr useconds_t kThreadSuspendInitialSleepUs = 0;
+static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
+static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
 
 // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
 // some history.
@@ -73,7 +76,7 @@ static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true;
 ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns)
     : suspend_all_count_(0),
       unregistering_count_(0),
-      suspend_all_histogram_("suspend all histogram", 16, 64),
+      suspend_all_historam_("suspend all histogram", 16, 64),
       long_suspend_(false),
       shut_down_(false),
       thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
@@ -134,10 +137,10 @@ void ThreadList::DumpForSigQuit(std::ostream& os) {
   {
     ScopedObjectAccess soa(Thread::Current());
     // Only print if we have samples.
-    if (suspend_all_histogram_.SampleSize() > 0) {
+    if (suspend_all_historam_.SampleSize() > 0) {
       Histogram<uint64_t>::CumulativeData data;
-      suspend_all_histogram_.CreateHistogram(&data);
-      suspend_all_histogram_.PrintConfidenceIntervals(os, 0.99, data);  // Dump time to suspend.
+      suspend_all_historam_.CreateHistogram(&data);
+      suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data);  // Dump time to suspend.
     }
   }
   bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
@@ -273,11 +276,11 @@ void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
   }
 }
 
-void ThreadList::AssertOtherThreadsAreSuspended(Thread* self) {
+void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) {
   MutexLock mu(self, *Locks::thread_list_lock_);
   MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   for (const auto& thread : list_) {
-    if (thread != self) {
+    if (thread != ignore1 && thread != ignore2) {
       CHECK(thread->IsSuspended())
             << "\nUnsuspended thread: <<" << *thread << "\n"
             << "self: <<" << *Thread::Current();
@@ -304,9 +307,20 @@ NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() {
 }
 #endif
 
-size_t ThreadList::RunCheckpoint(Closure* checkpoint_function,
-                                 Closure* callback,
-                                 bool allowLockChecking) {
+// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
+// individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then
+// we use sched_yield instead of calling usleep.
+// Although there is the possibility, here and elsewhere, that usleep could return -1 and
+// errno = EINTR, there should be no problem if interrupted, so we do not check.
+static void ThreadSuspendSleep(useconds_t delay_us) {
+  if (delay_us == 0) {
+    sched_yield();
+  } else {
+    usleep(delay_us);
+  }
+}
+
+size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
@@ -318,9 +332,6 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function,
     // Call a checkpoint function for each thread, threads which are suspended get their checkpoint
     // manually called.
     MutexLock mu(self, *Locks::thread_list_lock_);
-    if (kIsDebugBuild && allowLockChecking) {
-      self->DisallowPreMonitorMutexes();
-    }
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     count = list_.size();
     for (const auto& thread : list_) {
@@ -331,23 +342,31 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function,
             // This thread will run its checkpoint some time in the near future.
             if (requested_suspend) {
               // The suspend request is now unnecessary.
-              thread->DecrementSuspendCount(self);
+              bool updated =
+                  thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+              DCHECK(updated);
               requested_suspend = false;
             }
             break;
           } else {
-            // The thread is probably suspended.
+            // The thread is probably suspended, try to make sure that it stays suspended.
+            if (thread->GetState() == ThreadState::kRunnable) {
+              // Spurious fail, try again.
+              continue;
+            }
             if (!requested_suspend) {
-              // Since we don't suspend other threads to run checkpoint_function, we claim this is
-              // safe even with flip_function set.
-              // This does not risk suspension cycles: We may have a pending suspension request,
-              // but it cannot block us: Checkpoint Run() functions may not suspend, thus we cannot
-              // be blocked from decrementing the count again.
-              thread->IncrementSuspendCount(self);
+              bool updated =
+                  thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+              DCHECK(updated);
               requested_suspend = true;
-            }
-            if (thread->IsSuspended()) {
-              // We saw it suspended after incrementing suspend count, so it will stay that way.
+              if (thread->IsSuspended()) {
+                break;
+              }
+              // The thread raced us to become Runnable. Try to RequestCheckpoint() again.
+            } else {
+              // The thread previously raced our suspend request to become Runnable but
+              // since it is suspended again, it must honor that suspend request now.
+              DCHECK(thread->IsSuspended());
               break;
             }
           }
@@ -356,8 +375,6 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function,
           suspended_count_modified_threads.push_back(thread);
         }
       }
-      // Thread either has honored or will honor the checkpoint, or it has been added to
-      // suspended_count_modified_threads.
     }
     // Run the callback to be called inside this critical section.
     if (callback != nullptr) {
@@ -375,7 +392,8 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function,
     checkpoint_function->Run(thread);
     {
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-      thread->DecrementSuspendCount(self);
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+      DCHECK(updated);
     }
   }
 
@@ -386,9 +404,6 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function,
     Thread::resume_cond_->Broadcast(self);
   }
 
-  if (kIsDebugBuild && allowLockChecking) {
-    self->AllowPreMonitorMutexes();
-  }
   return count;
 }
 
@@ -517,14 +532,14 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
   // ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the
   // pause.
   const uint64_t suspend_start_time = NanoTime();
-  SuspendAllInternal(self);
+  SuspendAllInternal(self, self, nullptr);
   if (pause_listener != nullptr) {
     pause_listener->StartPause();
   }
 
   // Run the flip callback for the collector.
   Locks::mutator_lock_->ExclusiveLock(self);
-  suspend_all_histogram_.AdjustAndAddValue(NanoTime() - suspend_start_time);
+  suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time);
   flip_callback->Run(self);
   Locks::mutator_lock_->ExclusiveUnlock(self);
   collector->RegisterPause(NanoTime() - suspend_start_time);
@@ -555,7 +570,8 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
       if ((state == ThreadState::kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) &&
           thread->GetSuspendCount() == 1) {
         // The thread will resume right after the broadcast.
-        thread->DecrementSuspendCount(self);
+        bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+        DCHECK(updated);
         ++runnable_thread_count;
       } else {
         other_threads.push_back(thread);
@@ -584,7 +600,8 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
     TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : other_threads) {
-      thread->DecrementSuspendCount(self);
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+      DCHECK(updated);
     }
     Thread::resume_cond_->Broadcast(self);
   }
@@ -592,32 +609,6 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
   return runnable_thread_count + other_threads.size() + 1;  // +1 for self.
 }
 
-bool ThreadList::WaitForSuspendBarrier(AtomicInteger* barrier) {
-#if ART_USE_FUTEXES
-  timespec wait_timeout;
-  InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
-#endif
-  while (true) {
-    int32_t cur_val = barrier->load(std::memory_order_acquire);
-    if (cur_val <= 0) {
-      CHECK_EQ(cur_val, 0);
-      return true;
-    }
-#if ART_USE_FUTEXES
-    if (futex(barrier->Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0)
-        != 0) {
-      if (errno == ETIMEDOUT) {
-        return false;
-      } else if (errno != EAGAIN && errno != EINTR) {
-        PLOG(FATAL) << "futex wait for suspend barrier failed";
-      }
-    }
-#endif
-    // Else spin wait. This is likely to be slow, but ART_USE_FUTEXES is set on Linux,
-    // including all targets.
-  }
-}
-
 void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
   Thread* self = Thread::Current();
 
@@ -630,7 +621,7 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
     ScopedTrace trace("Suspending mutator threads");
     const uint64_t start_time = NanoTime();
 
-    SuspendAllInternal(self);
+    SuspendAllInternal(self, self);
     // All threads are known to have suspended (but a thread may still own the mutator lock)
     // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
 #if HAVE_TIMED_RWLOCK
@@ -654,14 +645,14 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
 
     const uint64_t end_time = NanoTime();
     const uint64_t suspend_time = end_time - start_time;
-    suspend_all_histogram_.AdjustAndAddValue(suspend_time);
+    suspend_all_historam_.AdjustAndAddValue(suspend_time);
     if (suspend_time > kLongThreadSuspendThreshold) {
       LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
     }
 
     if (kDebugLocking) {
       // Debug check that all threads are suspended.
-      AssertOtherThreadsAreSuspended(self);
+      AssertThreadsAreSuspended(self, self);
     }
   }
   ATraceBegin((std::string("Mutator threads suspended for ") + cause).c_str());
@@ -674,8 +665,10 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
 }
 
 // Ensures all threads running Java suspend and that those not running Java don't start.
-void ThreadList::SuspendAllInternal(Thread* self, SuspendReason reason) {
-  const uint64_t start_time = NanoTime();
+void ThreadList::SuspendAllInternal(Thread* self,
+                                    Thread* ignore1,
+                                    Thread* ignore2,
+                                    SuspendReason reason) {
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
@@ -693,77 +686,85 @@ void ThreadList::SuspendAllInternal(Thread* self, SuspendReason reason) {
 
   // The atomic counter for number of threads that need to pass the barrier.
   AtomicInteger pending_threads;
+  uint32_t num_ignored = 0;
+  if (ignore1 != nullptr) {
+    ++num_ignored;
+  }
+  if (ignore2 != nullptr && ignore1 != ignore2) {
+    ++num_ignored;
+  }
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    // Update global suspend all state for attaching threads.
+    ++suspend_all_count_;
+    pending_threads.store(list_.size() - num_ignored, std::memory_order_relaxed);
+    // Increment everybody's suspend count (except those that should be ignored).
+    for (const auto& thread : list_) {
+      if (thread == ignore1 || thread == ignore2) {
+        continue;
+      }
+      VLOG(threads) << "requesting thread suspend: " << *thread;
+      bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, reason);
+      DCHECK(updated);
+
+      // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
+      // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()
+      // that can lead a thread to miss a call to PassActiveSuspendBarriers().
+      if (thread->IsSuspended()) {
+        // Only clear the counter for the current thread.
+        thread->ClearSuspendBarrier(&pending_threads);
+        pending_threads.fetch_sub(1, std::memory_order_seq_cst);
+      }
+    }
+  }
+
+  // Wait for the barrier to be passed by all runnable threads. This wait
+  // is done with a timeout so that we can detect problems.
+#if ART_USE_FUTEXES
+  timespec wait_timeout;
+  InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
+#endif
+  const uint64_t start_time = NanoTime();
   while (true) {
-    {
-      MutexLock mu(self, *Locks::thread_list_lock_);
-      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-      bool in_flip = false;
-      for (const auto& thread : list_) {
-        if (thread->GetFlipFunction() != nullptr) {
-          in_flip = true;
-          break;
+    int32_t cur_val = pending_threads.load(std::memory_order_relaxed);
+    if (LIKELY(cur_val > 0)) {
+#if ART_USE_FUTEXES
+      if (futex(pending_threads.Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0)
+          != 0) {
+        if ((errno == EAGAIN) || (errno == EINTR)) {
+          // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
+          continue;
         }
-      }
-      if (LIKELY(suspend_all_count_ == 0
-                 && (self == nullptr || self->GetSuspendCount() == 0)
-                 && !in_flip)) {
-        // The above condition remains valid while we hold thread_suspend_count_lock_ .
-        bool found_myself = false;
-        // Update global suspend all state for attaching threads.
-        ++suspend_all_count_;
-        pending_threads.store(list_.size() - (self == nullptr ? 0 : 1), std::memory_order_relaxed);
-        // Increment everybody else's suspend count.
-        for (const auto& thread : list_) {
-          if (thread == self) {
-            found_myself = true;
-          } else {
-            VLOG(threads) << "requesting thread suspend: " << *thread;
-            DCHECK_EQ(suspend_all_count_, 1);
-            thread->IncrementSuspendCount(self, &pending_threads, nullptr, reason);
-
-            // Must install the pending_threads counter first, then check thread->IsSuspended()
-            // and clear the counter. Otherwise there's a race with
-            // Thread::TransitionFromRunnableToSuspended() that can lead a thread to miss a call
-            // to PassActiveSuspendBarriers().
-            if (thread->IsSuspended()) {
-              // Effectively pass the barrier on behalf of the already suspended thread.
-              DCHECK_EQ(thread->tlsPtr_.active_suspendall_barrier, &pending_threads);
-              pending_threads.fetch_sub(1, std::memory_order_seq_cst);
-              thread->tlsPtr_.active_suspendall_barrier = nullptr;
-              if (!thread->HasActiveSuspendBarrier()) {
-                thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
-              }
+        if (errno == ETIMEDOUT) {
+          const uint64_t wait_time = NanoTime() - start_time;
+          MutexLock mu(self, *Locks::thread_list_lock_);
+          MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+          std::ostringstream oss;
+          for (const auto& thread : list_) {
+            if (thread == ignore1 || thread == ignore2) {
+              continue;
+            }
+            if (!thread->IsSuspended()) {
+              oss << std::endl << "Thread not suspended: " << *thread;
             }
           }
+          LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
+              << "Timed out waiting for threads to suspend, waited for "
+              << PrettyDuration(wait_time)
+              << oss.str();
+        } else {
+          PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
         }
-        DCHECK(self == nullptr || found_myself);
-        break;
-      }
-    }
-    // The if (LIKELY ...) condition above didn't hold.  This is a bad time to initiate a suspend.
-    // Either the suspendall_barrier is already in use, or proceeding at this time risks deadlock.
-    // See b/31683379 for an explanation of the thread flip condition. Note that in the event of a
-    // competing Suspend or SuspendAll, we are about to be suspended anyway. We hold no locks,
-    // so it's safe to sleep and retry.
-    VLOG(threads) << "SuspendAll is sleeping";
-    usleep(kThreadSuspendSleepUs);
-    // We're already not runnable, so an attempt to suspend us should succeed.
-  }
-
-  if (!WaitForSuspendBarrier(&pending_threads)) {
-    const uint64_t wait_time = NanoTime() - start_time;
-    MutexLock mu(self, *Locks::thread_list_lock_);
-    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-    std::ostringstream oss;
-    for (const auto& thread : list_) {
-      if (thread != self && !thread->IsSuspended()) {
-        oss << std::endl << "Thread not suspended: " << *thread;
-      }
+      }  // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
+#else
+      // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
+      UNUSED(start_time);
+#endif
+    } else {
+      CHECK_EQ(cur_val, 0);
+      break;
     }
-    LOG(::android::base::FATAL)
-        << "Timed out waiting for threads to suspend, waited for "
-        << PrettyDuration(wait_time)
-        << oss.str();
   }
 }
 
@@ -782,7 +783,7 @@ void ThreadList::ResumeAll() {
 
   if (kDebugLocking) {
     // Debug check that all threads are suspended.
-    AssertOtherThreadsAreSuspended(self);
+    AssertThreadsAreSuspended(self, self);
   }
 
   long_suspend_ = false;
@@ -795,9 +796,11 @@ void ThreadList::ResumeAll() {
     --suspend_all_count_;
     // Decrement the suspend counts for all threads.
     for (const auto& thread : list_) {
-      if (thread != self) {
-        thread->DecrementSuspendCount(self);
+      if (thread == self) {
+        continue;
       }
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+      DCHECK(updated);
     }
 
     // Broadcast a notification to all suspended threads, some or all of
@@ -842,7 +845,11 @@ bool ThreadList::Resume(Thread* thread, SuspendReason reason) {
           << ") thread not within thread list";
       return false;
     }
-    thread->DecrementSuspendCount(self, reason);
+    if (UNLIKELY(!thread->ModifySuspendCount(self, -1, nullptr, reason))) {
+      LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
+                 << ") could not modify suspend count.";
+      return false;
+    }
   }
 
   {
@@ -872,19 +879,40 @@ static void ThreadSuspendByPeerWarning(Thread* self,
   }
 }
 
-Thread* ThreadList::SuspendThreadByPeer(jobject peer, SuspendReason reason) {
-  bool is_suspended = false;
+Thread* ThreadList::SuspendThreadByPeer(jobject peer,
+                                        SuspendReason reason,
+                                        bool* timed_out) {
+  bool request_suspension = true;
+  const uint64_t start_time = NanoTime();
+  int self_suspend_count = 0;
+  useconds_t sleep_us = kThreadSuspendInitialSleepUs;
+  *timed_out = false;
   Thread* const self = Thread::Current();
+  Thread* suspended_thread = nullptr;
   VLOG(threads) << "SuspendThreadByPeer starting";
-  Thread* thread;
-  WrappedSuspend1Barrier wrapped_barrier{};
   while (true) {
+    Thread* thread;
     {
-      // Note: this will transition to runnable and potentially suspend.
+      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+      // is requesting another suspend, to avoid deadlock, by requiring this function be called
+      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+      // than request thread suspension, to avoid potential cycles in threads requesting each other
+      // suspend.
       ScopedObjectAccess soa(self);
       MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
       thread = Thread::FromManagedThread(soa, peer);
       if (thread == nullptr) {
+        if (suspended_thread != nullptr) {
+          MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
+          // If we incremented the suspend count but the thread reset its peer, we need to
+          // re-decrement it since it is shutting down and may deadlock the runtime in
+          // ThreadList::WaitForOtherNonDaemonThreadsToExit.
+          bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
+                                                              -1,
+                                                              nullptr,
+                                                              reason);
+          DCHECK(updated);
+        }
         ThreadSuspendByPeerWarning(self,
                                    ::android::base::WARNING,
                                     "No such thread for suspend",
@@ -892,138 +920,188 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, SuspendReason reason) {
         return nullptr;
       }
       if (!Contains(thread)) {
+        CHECK(suspended_thread == nullptr);
         VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: "
             << reinterpret_cast<void*>(thread);
         return nullptr;
       }
-      // IsSuspended on the current thread will fail as the current thread is changed into
-      // Runnable above. As the suspend count is now raised if this is the current thread
-      // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
-      // to just explicitly handle the current thread in the callers to this code.
-      CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
       VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
       {
         MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
-        if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) {
-          thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason);
-          if (thread->IsSuspended()) {
-            // See the discussion in mutator_gc_coord.md for the race here.
-            thread->RemoveFirstSuspend1Barrier();
-            if (!thread->HasActiveSuspendBarrier()) {
-              thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
-            }
-            is_suspended = true;
+        if (request_suspension) {
+          if (self->GetSuspendCount() > 0) {
+            // We hold the suspend count lock but another thread is trying to suspend us. Its not
+            // safe to try to suspend another thread in case we get a cycle. Start the loop again
+            // which will allow this thread to be suspended.
+            ++self_suspend_count;
+            continue;
           }
-          DCHECK_GT(thread->GetSuspendCount(), 0);
-          break;
+          CHECK(suspended_thread == nullptr);
+          suspended_thread = thread;
+          bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, reason);
+          DCHECK(updated);
+          request_suspension = false;
+        } else {
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
+          if (ATraceEnabled()) {
+            std::string name;
+            thread->GetThreadName(name);
+            ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
+                                      peer).c_str());
+          }
+          return thread;
+        }
+        const uint64_t total_delay = NanoTime() - start_time;
+        if (total_delay >= thread_suspend_timeout_ns_) {
+          if (suspended_thread == nullptr) {
+            ThreadSuspendByPeerWarning(self,
+                                       ::android::base::FATAL,
+                                       "Failed to issue suspend request",
+                                       peer);
+          } else {
+            CHECK_EQ(suspended_thread, thread);
+            LOG(WARNING) << "Suspended thread state_and_flags: "
+                         << suspended_thread->StateAndFlagsAsHexString()
+                         << ", self_suspend_count = " << self_suspend_count;
+            // Explicitly release thread_suspend_count_lock_; we haven't held it for long, so
+            // seeing threads blocked on it is not informative.
+            Locks::thread_suspend_count_lock_->Unlock(self);
+            ThreadSuspendByPeerWarning(self,
+                                       ::android::base::FATAL,
+                                       "Thread suspension timed out",
+                                       peer);
+          }
+          UNREACHABLE();
+        } else if (sleep_us == 0 &&
+            total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
+          // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
+          // excessive CPU usage.
+          sleep_us = kThreadSuspendMaxYieldUs / 2;
         }
-        // Else we either hold the suspend count lock but another thread is trying to suspend us,
-        // making it unsafe to try to suspend another thread in case we get a cycle.  Or we're
-        // currently in the middle of a flip, and could otherwise encounter b/31683379. In either
-        // case, start the loop again, which will allow this thread to be suspended.
       }
+      // Release locks and come out of runnable state.
     }
-    // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry.
-    VLOG(threads) << "SuspendThreadByPeer is sleeping";
-    usleep(kThreadSuspendSleepUs);
-  }
-  // Now wait for target to decrement suspend barrier.
-  if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) {
-    // wrapped_barrier.barrier_ has been decremented and will no longer be accessed.
-    VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
-    if (ATraceEnabled()) {
-      std::string name;
-      thread->GetThreadName(name);
-      ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
-                                peer).c_str());
-    }
-    return thread;
-  } else {
-    LOG(WARNING) << "Suspended thread state_and_flags: "
-                 << thread->StateAndFlagsAsHexString();
-    // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe
-    // without additional cleanup.
-    ThreadSuspendByPeerWarning(self,
-                               ::android::base::FATAL,
-                               "SuspendThreadByPeer timed out",
-                               peer);
-    UNREACHABLE();
+    VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend";
+    ThreadSuspendSleep(sleep_us);
+    // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at
+    // all if possible. This shouldn't be an issue since time to suspend should always be small.
+    sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
   }
 }
 
-
 static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
                                            const char* message,
                                            uint32_t thread_id) {
   LOG(severity) << StringPrintf("%s: %d", message, thread_id);
 }
 
-Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason) {
-  bool is_suspended = false;
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
+                                            SuspendReason reason,
+                                            bool* timed_out) {
+  const uint64_t start_time = NanoTime();
+  useconds_t sleep_us = kThreadSuspendInitialSleepUs;
+  *timed_out = false;
+  Thread* suspended_thread = nullptr;
   Thread* const self = Thread::Current();
   CHECK_NE(thread_id, kInvalidThreadId);
   VLOG(threads) << "SuspendThreadByThreadId starting";
-  Thread* thread;
-  WrappedSuspend1Barrier wrapped_barrier{};
   while (true) {
     {
-      // Note: this will transition to runnable and potentially suspend.
+      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+      // is requesting another suspend, to avoid deadlock, by requiring this function be called
+      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+      // than request thread suspension, to avoid potential cycles in threads requesting each other
+      // suspend.
       ScopedObjectAccess soa(self);
       MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
-      thread = FindThreadByThreadId(thread_id);
+      Thread* thread = nullptr;
+      for (const auto& it : list_) {
+        if (it->GetThreadId() == thread_id) {
+          thread = it;
+          break;
+        }
+      }
       if (thread == nullptr) {
+        CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
+            << " no longer in thread list";
         // There's a race in inflating a lock and the owner giving up ownership and then dying.
         ThreadSuspendByThreadIdWarning(::android::base::WARNING,
                                        "No such thread id for suspend",
                                        thread_id);
         return nullptr;
       }
-      DCHECK(Contains(thread));
-      CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
       VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
+      DCHECK(Contains(thread));
       {
         MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
-        if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) {
-          thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason);
-          if (thread->IsSuspended()) {
-            // See the discussion in mutator_gc_coord.md for the race here.
-            thread->RemoveFirstSuspend1Barrier();
-            if (!thread->HasActiveSuspendBarrier()) {
-             thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
-            }
-            is_suspended = true;
+        if (suspended_thread == nullptr) {
+          if (self->GetSuspendCount() > 0) {
+            // We hold the suspend count lock but another thread is trying to suspend us. Its not
+            // safe to try to suspend another thread in case we get a cycle. Start the loop again
+            // which will allow this thread to be suspended.
+            continue;
           }
-          DCHECK_GT(thread->GetSuspendCount(), 0);
-          break;
+          bool updated = thread->ModifySuspendCount(self, +1, nullptr, reason);
+          DCHECK(updated);
+          suspended_thread = thread;
+        } else {
+          CHECK_EQ(suspended_thread, thread);
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          if (ATraceEnabled()) {
+            std::string name;
+            thread->GetThreadName(name);
+            ATraceBegin(StringPrintf("SuspendThreadByThreadId suspended %s id=%d",
+                                      name.c_str(), thread_id).c_str());
+          }
+          VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
+          return thread;
+        }
+        const uint64_t total_delay = NanoTime() - start_time;
+        if (total_delay >= thread_suspend_timeout_ns_) {
+          ThreadSuspendByThreadIdWarning(::android::base::WARNING,
+                                         "Thread suspension timed out",
+                                         thread_id);
+          if (suspended_thread != nullptr) {
+            bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, reason);
+            DCHECK(updated);
+          }
+          *timed_out = true;
+          return nullptr;
+        } else if (sleep_us == 0 &&
+            total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
+          // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
+          // excessive CPU usage.
+          sleep_us = kThreadSuspendMaxYieldUs / 2;
         }
-        // Else we either hold the suspend count lock but another thread is trying to suspend us,
-        // making it unsafe to try to suspend another thread in case we get a cycle.  Or we're
-        // currently in the middle of a flip, and could otherwise encounter b/31683379. In either
-        // case, start the loop again, which will allow this thread to be suspended.
       }
+      // Release locks and come out of runnable state.
     }
-    // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry.
-    VLOG(threads) << "SuspendThreadByThreadId is sleeping";
-    usleep(kThreadSuspendSleepUs);
-  }
-  // Now wait for target to decrement suspend barrier.
-  if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) {
-    // wrapped_barrier.barrier_ has been decremented and will no longer be accessed.
-    VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
-    if (ATraceEnabled()) {
-      std::string name;
-      thread->GetThreadName(name);
-      ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for id=%d",
-                               name.c_str(), thread_id).c_str());
-    }
-    return thread;
-  } else {
-    // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe without
-    // additional cleanup.
-    ThreadSuspendByThreadIdWarning(::android::base::FATAL,
-                                   "SuspendThreadByThreadId timed out",
-                                   thread_id);
-    UNREACHABLE();
+    VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend";
+    ThreadSuspendSleep(sleep_us);
+    sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
   }
 }
 
@@ -1099,7 +1177,8 @@ void ThreadList::SuspendAllDaemonThreadsForShutdown() {
       // daemons.
       CHECK(thread->IsDaemon()) << *thread;
       if (thread != self) {
-        thread->IncrementSuspendCount(self);
+        bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+        DCHECK(updated);
         ++daemons_left;
       }
       // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
@@ -1199,10 +1278,11 @@ void ThreadList::Register(Thread* self) {
   // SuspendAll requests.
   MutexLock mu(self, *Locks::thread_list_lock_);
   MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-  if (suspend_all_count_ == 1) {
-    self->IncrementSuspendCount(self);
-  } else {
-    DCHECK_EQ(suspend_all_count_, 0);
+  // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
+  // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
+  for (int delta = suspend_all_count_; delta > 0; delta--) {
+    bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+    DCHECK(updated);
   }
   CHECK(!Contains(self));
   list_.push_back(self);
@@ -1308,11 +1388,13 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (Thread* thread : list_) {
-      thread->IncrementSuspendCount(self);
+      bool suspended = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+      DCHECK(suspended);
       if (thread == self || thread->IsSuspended()) {
         threads_to_visit.push_back(thread);
       } else {
-        thread->DecrementSuspendCount(self);
+        bool resumed = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+        DCHECK(resumed);
       }
     }
   }
@@ -1327,7 +1409,8 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
   {
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (Thread* thread : threads_to_visit) {
-      thread->DecrementSuspendCount(self);
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+      DCHECK(updated);
     }
   }
 }
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 18f173e434..17a53fa58d 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -70,7 +70,7 @@ class ThreadList {
   bool Resume(Thread* thread, SuspendReason reason = SuspendReason::kInternal)
       REQUIRES(!Locks::thread_suspend_count_lock_) WARN_UNUSED;
 
-  // Suspends all other threads and gets exclusive access to the mutator lock.
+  // Suspends all threads and gets exclusive access to the mutator lock.
   // If long_suspend is true, then other threads who try to suspend will never timeout.
   // long_suspend is currenly used for hprof since large heaps take a long time.
   void SuspendAll(const char* cause, bool long_suspend = false)
@@ -81,8 +81,10 @@ class ThreadList {
 
   // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
   // else null. The peer is used to identify the thread to avoid races with the thread terminating.
+  // If the suspension times out then *timeout is set to true.
   Thread* SuspendThreadByPeer(jobject peer,
-                              SuspendReason reason)
+                              SuspendReason reason,
+                              bool* timed_out)
       REQUIRES(!Locks::mutator_lock_,
                !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
@@ -90,8 +92,8 @@ class ThreadList {
   // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
   // thread on success else null. The thread id is used to identify the thread to avoid races with
   // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
-  // thread, that may be terminating.
-  Thread* SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason)
+  // thread, that may be terminating. If the suspension times out then *timeout is set to true.
+  Thread* SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason, bool* timed_out)
       REQUIRES(!Locks::mutator_lock_,
                !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
@@ -111,21 +113,11 @@ class ThreadList {
   // Running threads are not suspended but run the checkpoint inside of the suspend check. The
   // return value includes already suspended threads for b/24191051. Runs or requests the
   // callback, if non-null, inside the thread_list_lock critical section after determining the
-  // runnable/suspended states of the threads. Does not wait for completion of the checkpoint
-  // function in running threads. If the caller holds the mutator lock, then all instances of the
-  // checkpoint function are run with the mutator lock.
-  size_t RunCheckpoint(Closure* checkpoint_function,
-                       Closure* callback = nullptr,
-                       bool allowLockChecking = true)
+  // runnable/suspended states of the threads. Does not wait for completion of the callbacks in
+  // running threads.
+  size_t RunCheckpoint(Closure* checkpoint_function, Closure* callback = nullptr)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
-  // Convenience version of the above to disable lock checking inside Run function. Hopefully this
-  // and the third parameter above will eventually disappear.
-  size_t RunCheckpointUnchecked(Closure* checkpoint_function, Closure* callback = nullptr)
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_) {
-    return RunCheckpoint(checkpoint_function, callback, false);
-  }
-
   // Run an empty checkpoint on threads. Wait until threads pass the next suspend point or are
   // suspended. This is used to ensure that the threads finish or aren't in the middle of an
   // in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by
@@ -134,8 +126,8 @@ class ThreadList {
   void RunEmptyCheckpoint()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
-  // Flip thread roots from from-space refs to to-space refs. Used by the concurrent copying
-  // collector. Returns the total number of threads for which the visitor was run.
+  // Flip thread roots from from-space refs to to-space refs. Used by
+  // the concurrent copying collector.
   size_t FlipThreadRoots(Closure* thread_flip_visitor,
                          Closure* flip_callback,
                          gc::collector::GarbageCollector* collector,
@@ -198,6 +190,9 @@ class ThreadList {
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_);
 
+  size_t RunCheckpoint(Closure* checkpoint_function, bool includeSuspended)
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+
   void DumpUnattachedThreads(std::ostream& os, bool dump_native_stack)
       REQUIRES(!Locks::thread_list_lock_);
 
@@ -205,15 +200,12 @@ class ThreadList {
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
   void SuspendAllInternal(Thread* self,
+                          Thread* ignore1,
+                          Thread* ignore2 = nullptr,
                           SuspendReason reason = SuspendReason::kInternal)
-      REQUIRES(!Locks::thread_list_lock_,
-               !Locks::thread_suspend_count_lock_,
-               !Locks::mutator_lock_);
-
-  // Wait for suspend barrier to reach zero. Return false on timeout.
-  bool WaitForSuspendBarrier(AtomicInteger* barrier);
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
-  void AssertOtherThreadsAreSuspended(Thread* self)
+  void AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2 = nullptr)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
   std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_);
@@ -222,7 +214,6 @@ class ThreadList {
   std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_);
 
   // Ongoing suspend all requests, used to ensure threads added to list_ respect SuspendAll.
-  // The value is always either 0 or 1.
   int suspend_all_count_ GUARDED_BY(Locks::thread_suspend_count_lock_);
 
   // Number of threads unregistering, ~ThreadList blocks until this hits 0.
@@ -230,7 +221,7 @@ class ThreadList {
 
   // Thread suspend time histogram. Only modified when all the threads are suspended, so guarding
   // by mutator lock ensures no thread can read when another thread is modifying it.
-  Histogram<uint64_t> suspend_all_histogram_ GUARDED_BY(Locks::mutator_lock_);
+  Histogram<uint64_t> suspend_all_historam_ GUARDED_BY(Locks::mutator_lock_);
 
   // Whether or not the current thread suspension is long.
   bool long_suspend_;
diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
index 493f1d4073..78bb772893 100644
--- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
+++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
@@ -81,8 +81,10 @@ extern "C" JNIEXPORT void JNICALL Java_Main_waitAndInstrumentStack(JNIEnv*,
                                                                    jobject target) {
   while (!instrument_waiting) {
   }
+  bool timed_out = false;
   Thread* other = Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
-      target, SuspendReason::kInternal);
+      target, SuspendReason::kInternal, &timed_out);
+  CHECK(!timed_out);
   CHECK(other != nullptr);
   ScopedSuspendAll ssa(__FUNCTION__);
   Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(other,
author	Hans Boehm <hboehm@google.com>	2022-10-13 02:47:24 +0000
committer	Hans Boehm <hboehm@google.com>	2022-10-13 02:47:24 +0000
commit	ebd76406bf5fa74185998bc29f0f27c20fa2e683 (patch)
tree	beec1accc089bbbc432a140dfebe28bb7278a909
parent	fd20a745227aa7cae7a08728bb29e5bfce64ea87 (diff)