summaryrefslogtreecommitdiff
path: root/runtime
diff options
context:
space:
mode:
author Hans Boehm <hboehm@google.com> 2022-09-10 00:08:42 +0000
committer Hans Boehm <hboehm@google.com> 2022-09-10 00:08:42 +0000
commit7c8835df16147b9096dd4c9380ab4b5f700ea17d (patch)
treece0f48061b3ba7d5dc1da4d0dda78520f2629d4a /runtime
parent7c39c86b17c91e651de1fcc0876fe5565e3f5204 (diff)
Revert "Thread suspension cleanup and deadlock fix"
This reverts commit 7c39c86b17c91e651de1fcc0876fe5565e3f5204. Reason for revert: We're see a number of new, somewhat rare, timeouts on multiple tests. Change-Id: Ida9a4f80b64b6fedc16db176a8df9c2e985ef482
Diffstat (limited to 'runtime')
-rw-r--r--runtime/cha.cc2
-rw-r--r--runtime/entrypoints_order_test.cc12
-rw-r--r--runtime/monitor.cc5
-rw-r--r--runtime/mutator_gc_coord.md71
-rw-r--r--runtime/native/dalvik_system_VMStack.cc8
-rw-r--r--runtime/native/java_lang_Thread.cc8
-rw-r--r--runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc10
-rw-r--r--runtime/oat.h4
-rw-r--r--runtime/thread-inl.h121
-rw-r--r--runtime/thread.cc317
-rw-r--r--runtime/thread.h198
-rw-r--r--runtime/thread_list.cc527
-rw-r--r--runtime/thread_list.h29
13 files changed, 668 insertions, 644 deletions
diff --git a/runtime/cha.cc b/runtime/cha.cc
index 0c032aa125..d19d4f6e30 100644
--- a/runtime/cha.cc
+++ b/runtime/cha.cc
@@ -241,7 +241,7 @@ class CHACheckpoint final : public Closure {
// Note thread and self may not be equal if thread was already suspended at
// the point of the request.
Thread* self = Thread::Current();
- Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+ ScopedObjectAccess soa(self);
CHAStackVisitor visitor(thread, nullptr, method_headers_);
visitor.WalkStack();
barrier_.Pass(self);
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 8821aed259..2cd58dbf1b 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -111,15 +111,13 @@ class EntrypointsOrderTest : public CommonRuntimeTest {
sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_function,
sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspendall_barrier,
- sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspendall_barrier, active_suspend1_barriers,
- sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend1_barriers, thread_local_pos,
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspend_barriers,
sizeof(void*));
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, thread_local_start,
+ sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_limit, sizeof(void*));
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_limit, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_limit, thread_local_objects, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, jni_entrypoints, sizeof(size_t));
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 0cd05485e6..4e64c95b8c 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1065,10 +1065,13 @@ void Monitor::InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWo
ThreadList* thread_list = Runtime::Current()->GetThreadList();
// Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
self->SetMonitorEnterObject(obj.Get());
+ bool timed_out;
Thread* owner;
{
ScopedThreadSuspension sts(self, ThreadState::kWaitingForLockInflation);
- owner = thread_list->SuspendThreadByThreadId(owner_thread_id, SuspendReason::kInternal);
+ owner = thread_list->SuspendThreadByThreadId(owner_thread_id,
+ SuspendReason::kInternal,
+ &timed_out);
}
if (owner != nullptr) {
// We succeeded in suspending the thread, check the lock's status didn't change.
diff --git a/runtime/mutator_gc_coord.md b/runtime/mutator_gc_coord.md
index e724de4a76..b3635c558a 100644
--- a/runtime/mutator_gc_coord.md
+++ b/runtime/mutator_gc_coord.md
@@ -217,77 +217,6 @@ processing, after acquiring `reference_processor_lock_`. This means that empty
checkpoints do not preclude client threads from being in the middle of an
operation that involves a weak reference access, while nonempty checkpoints do.
-Thread Suspension Mechanics
----------------------------
-
-Thread suspension is initiated by a registered thread, except that, for testing
-purposes, `SuspendAll` may be invoked with `self == nullptr`. We never suspend
-the initiating thread, explicitly exclusing it from `SuspendAll()`, and failing
-`SuspendThreadBy...()` requests to that effect.
-
-The suspend calls invoke `IncrementSuspendCount()` to increment the thread
-suspend count for each thread. That adds a "suspend barrier" (atomic counter) to
-the per-thread list of such counters to decrement. It normally sets the
-`kSuspendRequest` ("should enter safepoint handler") and `kActiveSuspendBarrier`
-("need to notify us when suspended") flags.
-
-After setting these two flags, we check whether the thread is suspended and
-`kSuspendRequest` is still set. Since the thread is already suspended, it cannot
-be expected to respond to "pass the suspend barrier" (decrement the atomic
-counter) in a timely fashion. Hence we do so on its behalf. This decrements
-the "barrier" and removes it from the thread's list of barriers to decrement,
-and clears `kActiveSuspendBarrier`. `kSuspendRequest` remains to ensure the
-thread doesn't prematurely return to runnable state.
-
-If `SuspendAllInternal()` does not immediately see a suspended state, then it is up
-to the target thread to decrement the suspend barrier.
-`TransitionFromRunnableToSuspended()` calls
-`TransitionToSuspendedAndRunCheckpoints()`, which changes the thread state, and
-then calls `CheckActiveSuspendBarriers()` to check for the
-`kActiveSuspendBarrier` flag and decrement the suspend barrier if set.
-
-The `suspend_count_lock_` is not consistently held in the target thread
-during this process. Thus correctness in resolving the race between a
-suspension-requesting thread and a target thread voluntarily suspending relies
-on the following: If the requesting thread sets the flags with
-`kActiveSuspendBarrier` before the target's state is changed to suspended, then
-the target thread will see `kActiveSuspendBarrier` set, and will attempt to
-handle the barrier. If, on the other hand, the target thread changes the thread
-state first, then the requesting thread will see the suspended state, and handle
-the barrier itself. Since the actual update of suspend counts and suspend
-barrier data structures is done under the `suspend_count_lock_`, we always
-ensure that either the requestor removes/clears the barrier for a given target,
-or the target thread(s) decrement the barrier, but not both. This also ensures
-that the barrier cannot be decremented after the stack frame holding the barrier
-goes away.
-
-This relies on the fact that the two stores in the two threads to the state and
-kActiveSuspendBarrier flag are ordered with respect to the later loads. That's
-guaranteed, since they are all stored in a single `atomic<>`. Thus even relaxed
-accesses are OK.
-
-The actual suspend barrier representation still varies between `SuspendAll()`
-and `SuspendThreadBy...()`. The former relies on the fact that only one such
-barrier can be in use at a time, while the latter maintains a linked list of
-active suspend barriers for each target thread, relying on the fact that each
-one can appear on the list of only one thread, and we can thus use list nodes
-allocated in the stack frames of requesting threads.
-
-**Avoiding suspension cycles**
-
-Any thread can issue a `SuspendThreadByPeer()` or `SuspendAll()` request. But if
-Thread A increments Thread B's suspend count while Thread B increments Thread
-A's suspend count, and they then both suspend during a subsequent thread
-transition, we're deadlocked.
-
-For `SuspendAll()`, we enforce a requirement that at most one `SuspendAll()`
-request is running at one time. In addition, in all cases, we refuse to initiate
-a suspend request from a registered thread that is also being asked to suspend
-(i.e. the suspend count is nonzero). Instead the requestor waits for that
-condition to change. This means that we cannot create a cycle in which each
-thread has asked to suspend the next one, and thus no thread can progress. The
-required atomicity of the requestor suspend count check with setting the suspend
-count of the target(s) target is ensured by holding `suspend_count_lock_`.
[^1]: Some comments in the code refer to a not-yet-really-implemented scheme in
which the compiler-generated code would load through the address at
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 7b21de204d..71078c9ad2 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -57,7 +57,10 @@ static ResultT GetThreadStack(const ScopedFastNativeObjectAccess& soa,
// Suspend thread to build stack trace.
ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
ThreadList* thread_list = Runtime::Current()->GetThreadList();
- Thread* thread = thread_list->SuspendThreadByPeer(peer, SuspendReason::kInternal);
+ bool timed_out;
+ Thread* thread = thread_list->SuspendThreadByPeer(peer,
+ SuspendReason::kInternal,
+ &timed_out);
if (thread != nullptr) {
// Must be runnable to create returned array.
{
@@ -67,6 +70,9 @@ static ResultT GetThreadStack(const ScopedFastNativeObjectAccess& soa,
// Restart suspended thread.
bool resumed = thread_list->Resume(thread, SuspendReason::kInternal);
DCHECK(resumed);
+ } else if (timed_out) {
+ LOG(ERROR) << "Trying to get thread's stack failed as the thread failed to suspend within a "
+ "generous timeout.";
}
}
return trace;
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index fd67a0a7fa..570c554782 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -147,8 +147,11 @@ static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) {
// thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
// in the DDMS send code.
ThreadList* thread_list = Runtime::Current()->GetThreadList();
+ bool timed_out;
// Take suspend thread lock to avoid races with threads trying to suspend this one.
- Thread* thread = thread_list->SuspendThreadByPeer(peer, SuspendReason::kInternal);
+ Thread* thread = thread_list->SuspendThreadByPeer(peer,
+ SuspendReason::kInternal,
+ &timed_out);
if (thread != nullptr) {
{
ScopedObjectAccess soa(env);
@@ -156,6 +159,9 @@ static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) {
}
bool resumed = thread_list->Resume(thread, SuspendReason::kInternal);
DCHECK(resumed);
+ } else if (timed_out) {
+ LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
+ "failed to suspend within a generous timeout.";
}
}
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index f20cd281e8..081ec2043a 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -59,6 +59,7 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th
trace = Thread::InternalStackTraceToStackTraceElementArray(soa, internal_trace);
} else {
ThreadList* thread_list = Runtime::Current()->GetThreadList();
+ bool timed_out;
// Check for valid thread
if (thin_lock_id == ThreadList::kInvalidThreadId) {
@@ -66,7 +67,9 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th
}
// Suspend thread to build stack trace.
- Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, SuspendReason::kInternal);
+ Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id,
+ SuspendReason::kInternal,
+ &timed_out);
if (thread != nullptr) {
{
ScopedObjectAccess soa(env);
@@ -76,6 +79,11 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th
// Restart suspended thread.
bool resumed = thread_list->Resume(thread, SuspendReason::kInternal);
DCHECK(resumed);
+ } else {
+ if (timed_out) {
+ LOG(ERROR) << "Trying to get thread's stack by id failed as the thread failed to suspend "
+ "within a generous timeout.";
+ }
}
}
return trace;
diff --git a/runtime/oat.h b/runtime/oat.h
index 99cc4e9087..341e70bbe9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@ class InstructionSetFeatures;
class PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: Change suspend barrier data structure.
- static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '2', '8', '\0' } };
+ // Last oat version changed reason: Don't use instrumentation stubs for native methods.
+ static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '2', '7', '\0' } };
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 5d45c599fe..4110ed2851 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -27,6 +27,7 @@
#include "jni/jni_env_ext.h"
#include "managed_stack-inl.h"
#include "obj_ptr.h"
+#include "suspend_reason.h"
#include "thread-current-inl.h"
#include "thread_pool.h"
@@ -221,7 +222,7 @@ inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state
}
}
-inline void Thread::CheckActiveSuspendBarriers() {
+inline void Thread::PassActiveSuspendBarriers() {
while (true) {
StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
if (LIKELY(!state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) &&
@@ -229,7 +230,7 @@ inline void Thread::CheckActiveSuspendBarriers() {
!state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier))) {
break;
} else if (state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier)) {
- PassActiveSuspendBarriers();
+ PassActiveSuspendBarriers(this);
} else {
// Impossible
LOG(FATAL) << "Fatal, thread transitioned into suspended without running the checkpoint";
@@ -237,19 +238,6 @@ inline void Thread::CheckActiveSuspendBarriers() {
}
}
-inline void Thread::AddSuspend1Barrier(WrappedSuspend1Barrier* suspend1_barrier) {
- suspend1_barrier->next_ = tlsPtr_.active_suspend1_barriers;
- tlsPtr_.active_suspend1_barriers = suspend1_barrier;
-}
-
-inline void Thread::RemoveFirstSuspend1Barrier() {
- tlsPtr_.active_suspend1_barriers = tlsPtr_.active_suspend1_barriers->next_;
-}
-
-inline bool Thread::HasActiveSuspendBarrier() {
- return tlsPtr_.active_suspend1_barriers != nullptr || tlsPtr_.active_suspendall_barrier != nullptr;
-}
-
inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
// Note: JNI stubs inline a fast path of this method that transitions to suspended if
// there are no flags set and then clears the `held_mutexes[kMutatorLock]` (this comes
@@ -265,7 +253,7 @@ inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
// Mark the release of the share of the mutator lock.
GetMutatorLock()->TransitionFromRunnableToSuspended(this);
// Once suspended - check the active suspend barrier flag
- CheckActiveSuspendBarriers();
+ PassActiveSuspendBarriers();
}
inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
@@ -296,7 +284,7 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
break;
}
} else if (old_state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier)) {
- PassActiveSuspendBarriers();
+ PassActiveSuspendBarriers(this);
} else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) ||
old_state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest))) {
// Checkpoint flags should not be set while in suspended state.
@@ -436,80 +424,35 @@ inline void Thread::PoisonObjectPointersIfDebug() {
}
}
-inline void Thread::IncrementSuspendCount(Thread* self,
- AtomicInteger* suspendall_barrier,
- WrappedSuspend1Barrier* suspend1_barrier,
- SuspendReason reason) {
- if (kIsDebugBuild) {
- Locks::thread_suspend_count_lock_->AssertHeld(self);
- if (this != self && !IsSuspended()) {
- Locks::thread_list_lock_->AssertHeld(self);
- }
- }
- if (UNLIKELY(reason == SuspendReason::kForUserCode)) {
- Locks::user_code_suspension_lock_->AssertHeld(self);
- }
-
- // Avoid deadlocks during a thread flip. b/31683379. This condition is not necessary for
- // RunCheckpoint, but it does not use a suspend barrier.
- DCHECK(!gUseReadBarrier
- || this == self
- || (suspendall_barrier == nullptr && suspend1_barrier == nullptr)
- || GetFlipFunction() == nullptr);
-
- uint32_t flags = enum_cast<uint32_t>(ThreadFlag::kSuspendRequest);
- if (suspendall_barrier != nullptr) {
- DCHECK(suspend1_barrier == nullptr);
- DCHECK(tlsPtr_.active_suspendall_barrier == nullptr);
- tlsPtr_.active_suspendall_barrier = suspendall_barrier;
- flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
- } else if (suspend1_barrier != nullptr) {
- AddSuspend1Barrier(suspend1_barrier);
- flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
- }
-
- ++tls32_.suspend_count;
- if (reason == SuspendReason::kForUserCode) {
- ++tls32_.user_code_suspend_count;
- }
-
- // Two bits might be set simultaneously.
- tls32_.state_and_flags.fetch_or(flags, std::memory_order_seq_cst);
- TriggerSuspend();
-}
-
-inline void Thread::DecrementSuspendCount(Thread* self, SuspendReason reason) {
- if (kIsDebugBuild) {
- Locks::thread_suspend_count_lock_->AssertHeld(self);
- if (this != self && !IsSuspended()) {
- Locks::thread_list_lock_->AssertHeld(self);
- }
- }
- if (UNLIKELY(tls32_.suspend_count <= 0)) {
- UnsafeLogFatalForSuspendCount(self, this);
- UNREACHABLE();
- }
- if (UNLIKELY(reason == SuspendReason::kForUserCode)) {
- Locks::user_code_suspension_lock_->AssertHeld(self);
- if (UNLIKELY(tls32_.user_code_suspend_count <= 0)) {
- LOG(ERROR) << "user_code_suspend_count incorrect";
- UnsafeLogFatalForSuspendCount(self, this);
- UNREACHABLE();
+inline bool Thread::ModifySuspendCount(Thread* self,
+ int delta,
+ AtomicInteger* suspend_barrier,
+ SuspendReason reason) {
+ if (delta > 0 && ((gUseReadBarrier && this != self) || suspend_barrier != nullptr)) {
+ // When delta > 0 (requesting a suspend), ModifySuspendCountInternal() may fail either if
+ // active_suspend_barriers is full or we are in the middle of a thread flip. Retry in a loop.
+ while (true) {
+ if (LIKELY(ModifySuspendCountInternal(self, delta, suspend_barrier, reason))) {
+ return true;
+ } else {
+ // Failure means the list of active_suspend_barriers is full or we are in the middle of a
+ // thread flip, we should release the thread_suspend_count_lock_ (to avoid deadlock) and
+ // wait till the target thread has executed or Thread::PassActiveSuspendBarriers() or the
+ // flip function. Note that we could not simply wait for the thread to change to a suspended
+ // state, because it might need to run checkpoint function before the state change or
+ // resumes from the resume_cond_, which also needs thread_suspend_count_lock_.
+ //
+ // The list of active_suspend_barriers is very unlikely to be full since more than
+ // kMaxSuspendBarriers threads need to execute SuspendAllInternal() simultaneously, and
+ // target thread stays in kRunnable in the mean time.
+ Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
+ NanoSleep(100000);
+ Locks::thread_suspend_count_lock_->ExclusiveLock(self);
+ }
}
+ } else {
+ return ModifySuspendCountInternal(self, delta, suspend_barrier, reason);
}
-
- --tls32_.suspend_count;
- if (reason == SuspendReason::kForUserCode) {
- --tls32_.user_code_suspend_count;
- }
-
- if (tls32_.suspend_count == 0) {
- AtomicClearFlag(ThreadFlag::kSuspendRequest);
- }
-}
-
-inline void Thread::IncrementSuspendCount(Thread* self) {
- IncrementSuspendCount(self, nullptr, nullptr, SuspendReason::kInternal);
}
inline ShadowFrame* Thread::PushShadowFrame(ShadowFrame* new_top_frame) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6de88e6973..d233e6f174 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1436,8 +1436,7 @@ uint64_t Thread::GetCpuMicroTime() const {
}
// Attempt to rectify locks so that we dump thread list with required locks before exiting.
-void Thread::UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread)
- NO_THREAD_SAFETY_ANALYSIS {
+static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
LOG(ERROR) << *thread << " suspend count already zero.";
Locks::thread_suspend_count_lock_->Unlock(self);
if (!Locks::mutator_lock_->IsSharedHeld(self)) {
@@ -1455,51 +1454,141 @@ void Thread::UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread)
std::ostringstream ss;
Runtime::Current()->GetThreadList()->Dump(ss);
LOG(FATAL) << ss.str();
- UNREACHABLE();
}
-bool Thread::PassActiveSuspendBarriers() {
- DCHECK_EQ(this, Thread::Current());
- // Grab the suspend_count lock and copy the current set of barriers. Then clear the list and the
- // flag. The IncrementSuspendCount function requires the lock so we prevent a race between setting
+bool Thread::ModifySuspendCountInternal(Thread* self,
+ int delta,
+ AtomicInteger* suspend_barrier,
+ SuspendReason reason) {
+ if (kIsDebugBuild) {
+ DCHECK(delta == -1 || delta == +1)
+ << reason << " " << delta << " " << this;
+ Locks::thread_suspend_count_lock_->AssertHeld(self);
+ if (this != self && !IsSuspended()) {
+ Locks::thread_list_lock_->AssertHeld(self);
+ }
+ }
+ // User code suspensions need to be checked more closely since they originate from code outside of
+ // the runtime's control.
+ if (UNLIKELY(reason == SuspendReason::kForUserCode)) {
+ Locks::user_code_suspension_lock_->AssertHeld(self);
+ if (UNLIKELY(delta + tls32_.user_code_suspend_count < 0)) {
+ LOG(ERROR) << "attempting to modify suspend count in an illegal way.";
+ return false;
+ }
+ }
+ if (UNLIKELY(delta < 0 && tls32_.suspend_count <= 0)) {
+ UnsafeLogFatalForSuspendCount(self, this);
+ return false;
+ }
+
+ if (gUseReadBarrier && delta > 0 && this != self && tlsPtr_.flip_function != nullptr) {
+ // Force retry of a suspend request if it's in the middle of a thread flip to avoid a
+ // deadlock. b/31683379.
+ return false;
+ }
+
+ uint32_t flags = enum_cast<uint32_t>(ThreadFlag::kSuspendRequest);
+ if (delta > 0 && suspend_barrier != nullptr) {
+ uint32_t available_barrier = kMaxSuspendBarriers;
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ if (tlsPtr_.active_suspend_barriers[i] == nullptr) {
+ available_barrier = i;
+ break;
+ }
+ }
+ if (available_barrier == kMaxSuspendBarriers) {
+ // No barrier spaces available, we can't add another.
+ return false;
+ }
+ tlsPtr_.active_suspend_barriers[available_barrier] = suspend_barrier;
+ flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
+ }
+
+ tls32_.suspend_count += delta;
+ switch (reason) {
+ case SuspendReason::kForUserCode:
+ tls32_.user_code_suspend_count += delta;
+ break;
+ case SuspendReason::kInternal:
+ break;
+ }
+
+ if (tls32_.suspend_count == 0) {
+ AtomicClearFlag(ThreadFlag::kSuspendRequest);
+ } else {
+ // Two bits might be set simultaneously.
+ tls32_.state_and_flags.fetch_or(flags, std::memory_order_seq_cst);
+ TriggerSuspend();
+ }
+ return true;
+}
+
+bool Thread::PassActiveSuspendBarriers(Thread* self) {
+ // Grab the suspend_count lock and copy the current set of
+ // barriers. Then clear the list and the flag. The ModifySuspendCount
+ // function requires the lock so we prevent a race between setting
// the kActiveSuspendBarrier flag and clearing it.
- std::vector<AtomicInteger*> pass_barriers{};
+ AtomicInteger* pass_barriers[kMaxSuspendBarriers];
{
- MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+ MutexLock mu(self, *Locks::thread_suspend_count_lock_);
if (!ReadFlag(ThreadFlag::kActiveSuspendBarrier)) {
- // quick exit test: the barriers have already been claimed - this is possible as there may
- // be a race to claim and it doesn't matter who wins.
- // All of the callers of this function (except the SuspendAllInternal) will first test the
- // kActiveSuspendBarrier flag without lock. Here double-check whether the barrier has been
- // passed with the suspend_count lock.
+ // quick exit test: the barriers have already been claimed - this is
+ // possible as there may be a race to claim and it doesn't matter
+ // who wins.
+ // All of the callers of this function (except the SuspendAllInternal)
+ // will first test the kActiveSuspendBarrier flag without lock. Here
+ // double-check whether the barrier has been passed with the
+ // suspend_count lock.
return false;
}
- if (tlsPtr_.active_suspendall_barrier != nullptr) {
- pass_barriers.push_back(tlsPtr_.active_suspendall_barrier);
- tlsPtr_.active_suspendall_barrier = nullptr;
- }
- for (WrappedSuspend1Barrier* w = tlsPtr_.active_suspend1_barriers; w != nullptr; w = w->next_) {
- pass_barriers.push_back(&(w->barrier_));
+
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ pass_barriers[i] = tlsPtr_.active_suspend_barriers[i];
+ tlsPtr_.active_suspend_barriers[i] = nullptr;
}
- tlsPtr_.active_suspend1_barriers = nullptr;
AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
}
uint32_t barrier_count = 0;
- for (AtomicInteger* barrier : pass_barriers) {
- ++barrier_count;
- int32_t old_val = barrier->fetch_sub(1, std::memory_order_release);
- CHECK_GT(old_val, 0) << "Unexpected value for PassActiveSuspendBarriers(): " << old_val;
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; i++) {
+ AtomicInteger* pending_threads = pass_barriers[i];
+ if (pending_threads != nullptr) {
+ bool done = false;
+ do {
+ int32_t cur_val = pending_threads->load(std::memory_order_relaxed);
+ CHECK_GT(cur_val, 0) << "Unexpected value for PassActiveSuspendBarriers(): " << cur_val;
+ // Reduce value by 1.
+ done = pending_threads->CompareAndSetWeakRelaxed(cur_val, cur_val - 1);
#if ART_USE_FUTEXES
- if (old_val == 1) {
- futex(barrier->Address(), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
- }
+ if (done && (cur_val - 1) == 0) { // Weak CAS may fail spuriously.
+ futex(pending_threads->Address(), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
+ }
#endif
+ } while (!done);
+ ++barrier_count;
+ }
}
CHECK_GT(barrier_count, 0U);
return true;
}
+void Thread::ClearSuspendBarrier(AtomicInteger* target) {
+ CHECK(ReadFlag(ThreadFlag::kActiveSuspendBarrier));
+ bool clear_flag = true;
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ AtomicInteger* ptr = tlsPtr_.active_suspend_barriers[i];
+ if (ptr == target) {
+ tlsPtr_.active_suspend_barriers[i] = nullptr;
+ } else if (ptr != nullptr) {
+ clear_flag = false;
+ }
+ }
+ if (LIKELY(clear_flag)) {
+ AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
+ }
+}
+
void Thread::RunCheckpointFunction() {
// If this thread is suspended and another thread is running the checkpoint on its behalf,
// we may have a pending flip function that we need to run for the sake of those checkpoints
@@ -1551,25 +1640,28 @@ void Thread::RunEmptyCheckpoint() {
}
bool Thread::RequestCheckpoint(Closure* function) {
- StateAndFlags old_state_and_flags(0 /* unused */), new_state_and_flags(0 /* unused */);
- do {
- old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
- if (old_state_and_flags.GetState() != ThreadState::kRunnable) {
- return false; // Fail, thread is suspended and so can't run a checkpoint.
- }
- new_state_and_flags = old_state_and_flags;
- new_state_and_flags.SetFlag(ThreadFlag::kCheckpointRequest);
- } while (!tls32_.state_and_flags.CompareAndSetWeakSequentiallyConsistent(
- old_state_and_flags.GetValue(), new_state_and_flags.GetValue()));
- // Succeeded setting checkpoint flag, now insert the actual checkpoint.
- if (tlsPtr_.checkpoint_function == nullptr) {
- tlsPtr_.checkpoint_function = function;
- } else {
- checkpoint_overflow_.push_back(function);
+ StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
+ if (old_state_and_flags.GetState() != ThreadState::kRunnable) {
+ return false; // Fail, thread is suspended and so can't run a checkpoint.
}
- DCHECK(ReadFlag(ThreadFlag::kCheckpointRequest));
- TriggerSuspend();
- return true;
+
+ // We must be runnable to request a checkpoint.
+ DCHECK_EQ(old_state_and_flags.GetState(), ThreadState::kRunnable);
+ StateAndFlags new_state_and_flags = old_state_and_flags;
+ new_state_and_flags.SetFlag(ThreadFlag::kCheckpointRequest);
+ bool success = tls32_.state_and_flags.CompareAndSetStrongSequentiallyConsistent(
+ old_state_and_flags.GetValue(), new_state_and_flags.GetValue());
+ if (success) {
+ // Succeeded setting checkpoint flag, now insert the actual checkpoint.
+ if (tlsPtr_.checkpoint_function == nullptr) {
+ tlsPtr_.checkpoint_function = function;
+ } else {
+ checkpoint_overflow_.push_back(function);
+ }
+ CHECK(ReadFlag(ThreadFlag::kCheckpointRequest));
+ TriggerSuspend();
+ }
+ return success;
}
bool Thread::RequestEmptyCheckpoint() {
@@ -1648,85 +1740,85 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend
Thread* self_thread;
};
- Locks::thread_list_lock_->AssertExclusiveHeld(self);
- // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
- // suspend-count lock for too long.
- if (GetState() == ThreadState::kRunnable) {
- BarrierClosure barrier_closure(function);
- bool installed = false;
- {
- MutexLock mu(self, *Locks::thread_suspend_count_lock_);
- installed = RequestCheckpoint(&barrier_closure);
- }
- if (installed) {
- // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
- // reacquire it since we don't know if 'this' hasn't been deleted yet.
- Locks::thread_list_lock_->ExclusiveUnlock(self);
- ScopedThreadStateChange sts(self, suspend_state);
- barrier_closure.Wait(self, suspend_state);
- return true;
+ for (;;) {
+ Locks::thread_list_lock_->AssertExclusiveHeld(self);
+ // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
+ // suspend-count lock for too long.
+ if (GetState() == ThreadState::kRunnable) {
+ BarrierClosure barrier_closure(function);
+ bool installed = false;
+ {
+ MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+ installed = RequestCheckpoint(&barrier_closure);
+ }
+ if (installed) {
+ // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
+ // reacquire it since we don't know if 'this' hasn't been deleted yet.
+ Locks::thread_list_lock_->ExclusiveUnlock(self);
+ ScopedThreadStateChange sts(self, suspend_state);
+ barrier_closure.Wait(self, suspend_state);
+ return true;
+ }
+ // Fall-through.
}
- // No longer runnable. Fall-through.
- }
- // This thread is not runnable, make sure we stay suspended, then run the checkpoint.
- // Note: IncrementSuspendCount also expects the thread_list_lock to be held in
- // certain situations.
- while (true) {
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ // This thread is not runnable, make sure we stay suspended, then run the checkpoint.
+ // Note: ModifySuspendCountInternal also expects the thread_list_lock to be held in
+ // certain situations.
+ {
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- if (GetFlipFunction() == nullptr) {
- IncrementSuspendCount(self);
- break;
+ if (!ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal)) {
+ // Just retry the loop.
+ sched_yield();
+ continue;
+ }
}
- DCHECK(gUseReadBarrier); // Flip functions are only used by CC collector.
- sched_yield();
- }
- {
- // Release for the wait. The suspension will keep the target thread from being unregistered
- // and deleted. Reacquire after so that we can call DecrementSuspendCount without racing
- // against ThreadList::Unregister.
- ScopedThreadListLockUnlock stllu(self);
{
- ScopedThreadStateChange sts(self, suspend_state);
- while (GetState() == ThreadState::kRunnable) {
- // The target thread became runnable again. Wait till the suspend triggered in
- // IncrementSuspendCount moves the target thread to suspended.
- sched_yield();
+ // Release for the wait. The suspension will keep us from being deleted. Reacquire after so
+ // that we can call ModifySuspendCount without racing against ThreadList::Unregister.
+ ScopedThreadListLockUnlock stllu(self);
+ {
+ ScopedThreadStateChange sts(self, suspend_state);
+ while (GetState() == ThreadState::kRunnable) {
+ // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
+ // moves us to suspended.
+ sched_yield();
+ }
}
- }
- function->Run(this);
- }
+ function->Run(this);
+ }
- {
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ {
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- // Target was in a suspended state with a nonzero suspend count, so it must stay suspended.
- DCHECK_NE(GetState(), ThreadState::kRunnable);
- DecrementSuspendCount(self);
- }
+ DCHECK_NE(GetState(), ThreadState::kRunnable);
+ bool updated = ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
+ }
- {
- // Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its
- // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- Thread::resume_cond_->Broadcast(self);
- }
+ {
+ // Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its
+ // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ Thread::resume_cond_->Broadcast(self);
+ }
- // Release the thread_list_lock_ to be consistent with the barrier-closure path.
- Locks::thread_list_lock_->ExclusiveUnlock(self);
+ // Release the thread_list_lock_ to be consistent with the barrier-closure path.
+ Locks::thread_list_lock_->ExclusiveUnlock(self);
- return true; // We're done, break out of the loop.
+ return true; // We're done, break out of the loop.
+ }
}
void Thread::SetFlipFunction(Closure* function) {
// This is called with all threads suspended, except for the calling thread.
DCHECK(IsSuspended() || Thread::Current() == this);
DCHECK(function != nullptr);
- DCHECK(GetFlipFunction() == nullptr);
- tlsPtr_.flip_function.store(function, std::memory_order_relaxed);
+ DCHECK(tlsPtr_.flip_function == nullptr);
+ tlsPtr_.flip_function = function;
DCHECK(!GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags()));
AtomicSetFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_release);
}
@@ -1758,8 +1850,8 @@ void Thread::RunFlipFunction(Thread* self, bool notify) {
// no other thread may run checkpoints on a thread that's actually Runnable.
DCHECK_EQ(notify, ReadFlag(ThreadFlag::kRunningFlipFunction));
- Closure* flip_function = GetFlipFunction();
- tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed);
+ Closure* flip_function = tlsPtr_.flip_function;
+ tlsPtr_.flip_function = nullptr;
DCHECK(flip_function != nullptr);
flip_function->Run(this);
@@ -2354,9 +2446,10 @@ Thread::Thread(bool daemon)
tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
gc::allocator::RosAlloc::GetDedicatedFullRun());
tlsPtr_.checkpoint_function = nullptr;
- tlsPtr_.active_suspendall_barrier = nullptr;
- tlsPtr_.active_suspend1_barriers = nullptr;
- tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed);
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ tlsPtr_.active_suspend_barriers[i] = nullptr;
+ }
+ tlsPtr_.flip_function = nullptr;
tlsPtr_.thread_local_mark_stack = nullptr;
tls32_.is_transitioning_to_runnable = false;
ResetTlab();
@@ -2504,7 +2597,7 @@ Thread::~Thread() {
CHECK(!ReadFlag(ThreadFlag::kEmptyCheckpointRequest));
CHECK(tlsPtr_.checkpoint_function == nullptr);
CHECK_EQ(checkpoint_overflow_.size(), 0u);
- CHECK(GetFlipFunction() == nullptr);
+ CHECK(tlsPtr_.flip_function == nullptr);
CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
// Make sure we processed all deoptimization requests.
diff --git a/runtime/thread.h b/runtime/thread.h
index d979cce6c1..6b1c16c907 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -46,7 +46,6 @@
#include "reflective_handle_scope.h"
#include "runtime_globals.h"
#include "runtime_stats.h"
-#include "suspend_reason.h"
#include "thread_state.h"
namespace unwindstack {
@@ -102,6 +101,7 @@ class RootVisitor;
class ScopedObjectAccessAlreadyRunnable;
class ShadowFrame;
class StackedShadowFrameRecord;
+enum class SuspendReason : char;
class Thread;
class ThreadList;
enum VisitRootFlags : uint8_t;
@@ -133,7 +133,6 @@ enum class ThreadFlag : uint32_t {
kEmptyCheckpointRequest = 1u << 2,
// Register that at least 1 suspend barrier needs to be passed.
- // Changes to this flag are guarded by suspend_count_lock_ .
kActiveSuspendBarrier = 1u << 3,
// Marks that a "flip function" needs to be executed on this thread.
@@ -141,7 +140,7 @@ enum class ThreadFlag : uint32_t {
// Marks that the "flip function" is being executed by another thread.
//
- // This is used to guard against multiple threads trying to run the
+ // This is used to guards against multiple threads trying to run the
// "flip function" for the same thread while the thread is suspended.
//
// This is not needed when the thread is running the flip function
@@ -188,13 +187,6 @@ enum class WeakRefAccessState : int32_t {
kDisabled
};
-// See Thread.tlsPtr_.active_suspend1_barriers below for explanation.
-struct WrappedSuspend1Barrier {
- WrappedSuspend1Barrier() : barrier_(1), next_(nullptr) {}
- AtomicInteger barrier_;
- struct WrappedSuspend1Barrier* next_ GUARDED_BY(Locks::thread_suspend_count_lock_);
-};
-
// This should match RosAlloc::kNumThreadLocalSizeBrackets.
static constexpr size_t kNumRosAllocThreadLocalSizeBracketsInThread = 16;
@@ -314,7 +306,7 @@ class Thread {
}
bool IsSuspended() const {
- StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_acquire);
+ StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
return state_and_flags.GetState() != ThreadState::kRunnable &&
state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest);
}
@@ -330,32 +322,20 @@ class Thread {
return tls32_.define_class_counter;
}
- // Increment suspend count and optionally install at most one suspend barrier.
- // Should not be invoked on another thread while the target's flip_function is not null.
+ // If delta > 0 and (this != self or suspend_barrier is not null), this function may temporarily
+ // release thread_suspend_count_lock_ internally.
ALWAYS_INLINE
- void IncrementSuspendCount(Thread* self,
- AtomicInteger* suspend_all_barrier,
- WrappedSuspend1Barrier* suspend1_barrier,
- SuspendReason reason)
- REQUIRES(Locks::thread_suspend_count_lock_);
-
- // The same, but default reason to kInternal, and barriers to nullptr.
- ALWAYS_INLINE void IncrementSuspendCount(Thread* self)
+ bool ModifySuspendCount(Thread* self,
+ int delta,
+ AtomicInteger* suspend_barrier,
+ SuspendReason reason)
+ WARN_UNUSED
REQUIRES(Locks::thread_suspend_count_lock_);
- ALWAYS_INLINE void DecrementSuspendCount(Thread* self,
- SuspendReason reason = SuspendReason::kInternal)
- REQUIRES(Locks::thread_suspend_count_lock_);
-
- private:
- NO_RETURN static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread);
-
- public:
// Requests a checkpoint closure to run on another thread. The closure will be run when the
// thread notices the request, either in an explicit runtime CheckSuspend() call, or in a call
// originating from a compiler generated suspend point check. This returns true if the closure
- // was added and will (eventually) be executed. It returns false if the thread was found to be
- // runnable.
+ // was added and will (eventually) be executed. It returns false otherwise.
//
// Since multiple closures can be queued and some closures can delay other threads from running,
// no closure should attempt to suspend another thread while running.
@@ -385,14 +365,8 @@ class Thread {
bool RequestEmptyCheckpoint()
REQUIRES(Locks::thread_suspend_count_lock_);
- Closure* GetFlipFunction() {
- return tlsPtr_.flip_function.load(std::memory_order_relaxed);
- }
-
// Set the flip function. This is done with all threads suspended, except for the calling thread.
- void SetFlipFunction(Closure* function)
- REQUIRES(Locks::thread_suspend_count_lock_)
- REQUIRES(Locks::thread_list_lock_);
+ void SetFlipFunction(Closure* function);
// Ensure that thread flip function started running. If no other thread is executing
// it, the calling thread shall run the flip function and then notify other threads
@@ -1229,6 +1203,9 @@ class Thread {
tlsPtr_.held_mutexes[level] = mutex;
}
+ void ClearSuspendBarrier(AtomicInteger* target)
+ REQUIRES(Locks::thread_suspend_count_lock_);
+
bool ReadFlag(ThreadFlag flag) const {
return GetStateAndFlags(std::memory_order_relaxed).IsFlagSet(flag);
}
@@ -1474,7 +1451,6 @@ class Thread {
private:
explicit Thread(bool daemon);
- // A runnable thread should only be deleted from the thread itself.
~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
void Destroy();
@@ -1502,8 +1478,7 @@ class Thread {
// Avoid use, callers should use SetState.
// Used only by `Thread` destructor and stack trace collection in semi-space GC (currently
- // disabled by `kStoreStackTraces = false`). May not be called on a runnable thread other
- // than Thread::Current().
+ // disabled by `kStoreStackTraces = false`).
// NO_THREAD_SAFETY_ANALYSIS: This function is "Unsafe" and can be called in
// different states, so clang cannot perform the thread safety analysis.
ThreadState SetStateUnsafe(ThreadState new_state) NO_THREAD_SAFETY_ANALYSIS {
@@ -1512,13 +1487,12 @@ class Thread {
if (old_state == new_state) {
// Nothing to do.
} else if (old_state == ThreadState::kRunnable) {
- DCHECK_EQ(this, Thread::Current());
// Need to run pending checkpoint and suspend barriers. Run checkpoints in runnable state in
// case they need to use a ScopedObjectAccess. If we are holding the mutator lock and a SOA
// attempts to TransitionFromSuspendedToRunnable, it results in a deadlock.
TransitionToSuspendedAndRunCheckpoints(new_state);
// Since we transitioned to a suspended state, check the pass barrier requests.
- CheckActiveSuspendBarriers();
+ PassActiveSuspendBarriers();
} else {
while (true) {
StateAndFlags new_state_and_flags = old_state_and_flags;
@@ -1591,26 +1565,9 @@ class Thread {
REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_)
REQUIRES_SHARED(Locks::mutator_lock_);
- // Call PassActiveSuspendBarriers() if there are active barriers. Only called on current thread.
- ALWAYS_INLINE void CheckActiveSuspendBarriers()
+ ALWAYS_INLINE void PassActiveSuspendBarriers()
REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_);
- // Decrement all "suspend barriers" for the current thread, notifying threads that requested our
- // suspension. Only called on current thread.
- bool PassActiveSuspendBarriers()
- REQUIRES(!Locks::thread_suspend_count_lock_);
-
- // Add an entry to active_suspend1_barriers.
- ALWAYS_INLINE void AddSuspend1Barrier(WrappedSuspend1Barrier* suspend1_barrier)
- REQUIRES(Locks::thread_suspend_count_lock_);
-
- // Remove last-added entry from active_suspend1_barriers.
- // Only makes sense if we're still holding thread_suspend_count_lock_ since insertion.
- ALWAYS_INLINE void RemoveFirstSuspend1Barrier()
- REQUIRES(Locks::thread_suspend_count_lock_);
-
- ALWAYS_INLINE bool HasActiveSuspendBarrier() REQUIRES(Locks::thread_suspend_count_lock_);
-
// Registers the current thread as the jit sensitive thread. Should be called just once.
static void SetJitSensitiveThread() {
if (jit_sensitive_thread_ == nullptr) {
@@ -1625,6 +1582,13 @@ class Thread {
is_sensitive_thread_hook_ = is_sensitive_thread_hook;
}
+ bool ModifySuspendCountInternal(Thread* self,
+ int delta,
+ AtomicInteger* suspend_barrier,
+ SuspendReason reason)
+ WARN_UNUSED
+ REQUIRES(Locks::thread_suspend_count_lock_);
+
// Runs a single checkpoint function. If there are no more pending checkpoint functions it will
// clear the kCheckpointRequest flag. The caller is responsible for calling this in a loop until
// the kCheckpointRequest flag is cleared.
@@ -1633,6 +1597,9 @@ class Thread {
REQUIRES_SHARED(Locks::mutator_lock_);
void RunEmptyCheckpoint();
+ bool PassActiveSuspendBarriers(Thread* self)
+ REQUIRES(!Locks::thread_suspend_count_lock_);
+
// Install the protected region for implicit stack checks.
void InstallImplicitProtection();
@@ -1918,47 +1885,45 @@ class Thread {
} tls64_;
struct PACKED(sizeof(void*)) tls_ptr_sized_values {
- tls_ptr_sized_values() : card_table(nullptr),
- exception(nullptr),
- stack_end(nullptr),
- managed_stack(),
- suspend_trigger(nullptr),
- jni_env(nullptr),
- tmp_jni_env(nullptr),
- self(nullptr),
- opeer(nullptr),
- jpeer(nullptr),
- stack_begin(nullptr),
- stack_size(0),
- deps_or_stack_trace_sample(),
- wait_next(nullptr),
- monitor_enter_object(nullptr),
- top_handle_scope(nullptr),
- class_loader_override(nullptr),
- long_jump_context(nullptr),
- instrumentation_stack(nullptr),
- stacked_shadow_frame_record(nullptr),
- deoptimization_context_stack(nullptr),
- frame_id_to_shadow_frame(nullptr),
- name(nullptr),
- pthread_self(0),
- last_no_thread_suspension_cause(nullptr),
- checkpoint_function(nullptr),
- active_suspendall_barrier(nullptr),
- active_suspend1_barriers(nullptr),
- thread_local_pos(nullptr),
- thread_local_end(nullptr),
- thread_local_start(nullptr),
- thread_local_limit(nullptr),
- thread_local_objects(0),
- thread_local_alloc_stack_top(nullptr),
- thread_local_alloc_stack_end(nullptr),
- mutator_lock(nullptr),
- flip_function(nullptr),
- method_verifier(nullptr),
- thread_local_mark_stack(nullptr),
- async_exception(nullptr),
- top_reflective_handle_scope(nullptr) {
+ tls_ptr_sized_values() : card_table(nullptr),
+ exception(nullptr),
+ stack_end(nullptr),
+ managed_stack(),
+ suspend_trigger(nullptr),
+ jni_env(nullptr),
+ tmp_jni_env(nullptr),
+ self(nullptr),
+ opeer(nullptr),
+ jpeer(nullptr),
+ stack_begin(nullptr),
+ stack_size(0),
+ deps_or_stack_trace_sample(),
+ wait_next(nullptr),
+ monitor_enter_object(nullptr),
+ top_handle_scope(nullptr),
+ class_loader_override(nullptr),
+ long_jump_context(nullptr),
+ instrumentation_stack(nullptr),
+ stacked_shadow_frame_record(nullptr),
+ deoptimization_context_stack(nullptr),
+ frame_id_to_shadow_frame(nullptr),
+ name(nullptr),
+ pthread_self(0),
+ last_no_thread_suspension_cause(nullptr),
+ checkpoint_function(nullptr),
+ thread_local_start(nullptr),
+ thread_local_pos(nullptr),
+ thread_local_end(nullptr),
+ thread_local_limit(nullptr),
+ thread_local_objects(0),
+ thread_local_alloc_stack_top(nullptr),
+ thread_local_alloc_stack_end(nullptr),
+ mutator_lock(nullptr),
+ flip_function(nullptr),
+ method_verifier(nullptr),
+ thread_local_mark_stack(nullptr),
+ async_exception(nullptr),
+ top_reflective_handle_scope(nullptr) {
std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr);
}
@@ -2068,30 +2033,20 @@ class Thread {
// requests another checkpoint, it goes to the checkpoint overflow list.
Closure* checkpoint_function GUARDED_BY(Locks::thread_suspend_count_lock_);
- // After a thread observes a suspend request and enters a suspended state,
- // it notifies the requestor by arriving at a "suspend barrier". This consists of decrementing
- // the atomic integer representing the barrier. (This implementation was introduced in 2015 to
- // minimize cost. There may be other options.) These atomic integer barriers are always
- // stored on the requesting thread's stack. They are referenced from the target thread's
- // data structure in one of two ways; in either case the data structure referring to these
- // barriers is guarded by suspend_count_lock:
- // 1. A SuspendAll barrier is directly referenced from the target thread. Only one of these
- // can be active at a time:
- AtomicInteger* active_suspendall_barrier GUARDED_BY(Locks::thread_suspend_count_lock_);
- // 2. For individual thread suspensions, active barriers are embedded in a struct that is used
- // to link together all suspend requests for this thread. Unlike the SuspendAll case, each
- // barrier is referenced by a single target thread, and thus can appear only on a single list.
- // The struct as a whole is still stored on the requesting thread's stack.
- WrappedSuspend1Barrier* active_suspend1_barriers GUARDED_BY(Locks::thread_suspend_count_lock_);
+ // Pending barriers that require passing or NULL if non-pending. Installation guarding by
+ // Locks::thread_suspend_count_lock_.
+ // They work effectively as art::Barrier, but implemented directly using AtomicInteger and futex
+ // to avoid additional cost of a mutex and a condition variable, as used in art::Barrier.
+ AtomicInteger* active_suspend_barriers[kMaxSuspendBarriers];
+
+ // Thread-local allocation pointer. Moved here to force alignment for thread_local_pos on ARM.
+ uint8_t* thread_local_start;
// thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
// potentially better performance.
uint8_t* thread_local_pos;
uint8_t* thread_local_end;
- // Thread-local allocation pointer. Can be moved above the preceding two to correct alignment.
- uint8_t* thread_local_start;
-
// Thread local limit is how much we can expand the thread local buffer to, it is greater or
// equal to thread_local_end.
uint8_t* thread_local_limit;
@@ -2118,8 +2073,7 @@ class Thread {
BaseMutex* held_mutexes[kLockLevelCount];
// The function used for thread flip.
- // Set only while holding Locks::thread_suspend_count_lock_ . May be cleared while being read.
- std::atomic<Closure*> flip_function;
+ Closure* flip_function;
// Current method verifier, used for root marking.
verifier::MethodVerifier* method_verifier;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 77e373d7de..6b23c349ef 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -62,7 +62,10 @@ namespace art {
using android::base::StringPrintf;
static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
-static constexpr useconds_t kThreadSuspendSleepUs = 1000;
+// Use 0 since we want to yield to prevent blocking for an unpredictable amount of time.
+static constexpr useconds_t kThreadSuspendInitialSleepUs = 0;
+static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
+static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
// Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
// some history.
@@ -71,7 +74,7 @@ static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true;
ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns)
: suspend_all_count_(0),
unregistering_count_(0),
- suspend_all_histogram_("suspend all histogram", 16, 64),
+ suspend_all_historam_("suspend all histogram", 16, 64),
long_suspend_(false),
shut_down_(false),
thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
@@ -132,10 +135,10 @@ void ThreadList::DumpForSigQuit(std::ostream& os) {
{
ScopedObjectAccess soa(Thread::Current());
// Only print if we have samples.
- if (suspend_all_histogram_.SampleSize() > 0) {
+ if (suspend_all_historam_.SampleSize() > 0) {
Histogram<uint64_t>::CumulativeData data;
- suspend_all_histogram_.CreateHistogram(&data);
- suspend_all_histogram_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend.
+ suspend_all_historam_.CreateHistogram(&data);
+ suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend.
}
}
bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
@@ -257,11 +260,11 @@ void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
}
}
-void ThreadList::AssertOtherThreadsAreSuspended(Thread* self) {
+void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) {
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (const auto& thread : list_) {
- if (thread != self) {
+ if (thread != ignore1 && thread != ignore2) {
CHECK(thread->IsSuspended())
<< "\nUnsuspended thread: <<" << *thread << "\n"
<< "self: <<" << *Thread::Current();
@@ -288,6 +291,19 @@ NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() {
}
#endif
+// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
+// individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then
+// we use sched_yield instead of calling usleep.
+// Although there is the possibility, here and elsewhere, that usleep could return -1 and
+// errno = EINTR, there should be no problem if interrupted, so we do not check.
+static void ThreadSuspendSleep(useconds_t delay_us) {
+ if (delay_us == 0) {
+ sched_yield();
+ } else {
+ usleep(delay_us);
+ }
+}
+
size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) {
Thread* self = Thread::Current();
Locks::mutator_lock_->AssertNotExclusiveHeld(self);
@@ -310,7 +326,9 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback
// This thread will run its checkpoint some time in the near future.
if (requested_suspend) {
// The suspend request is now unnecessary.
- thread->DecrementSuspendCount(self);
+ bool updated =
+ thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
requested_suspend = false;
}
break;
@@ -321,9 +339,9 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback
continue;
}
if (!requested_suspend) {
- // Since we don't suspend other threads to run checkpoint_function, we claim this is
- // safe even with flip_function set.
- thread->IncrementSuspendCount(self);
+ bool updated =
+ thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
requested_suspend = true;
if (thread->IsSuspended()) {
break;
@@ -358,7 +376,8 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback
checkpoint_function->Run(thread);
{
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- thread->DecrementSuspendCount(self);
+ bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
}
}
@@ -497,14 +516,14 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
// ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the
// pause.
const uint64_t suspend_start_time = NanoTime();
- SuspendAllInternal(self);
+ SuspendAllInternal(self, self, nullptr);
if (pause_listener != nullptr) {
pause_listener->StartPause();
}
// Run the flip callback for the collector.
Locks::mutator_lock_->ExclusiveLock(self);
- suspend_all_histogram_.AdjustAndAddValue(NanoTime() - suspend_start_time);
+ suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time);
flip_callback->Run(self);
Locks::mutator_lock_->ExclusiveUnlock(self);
collector->RegisterPause(NanoTime() - suspend_start_time);
@@ -535,7 +554,8 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
if ((state == ThreadState::kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) &&
thread->GetSuspendCount() == 1) {
// The thread will resume right after the broadcast.
- thread->DecrementSuspendCount(self);
+ bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
++runnable_thread_count;
} else {
other_threads.push_back(thread);
@@ -564,7 +584,8 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (const auto& thread : other_threads) {
- thread->DecrementSuspendCount(self);
+ bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
}
Thread::resume_cond_->Broadcast(self);
}
@@ -572,32 +593,6 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
return runnable_thread_count + other_threads.size() + 1; // +1 for self.
}
-bool ThreadList::WaitForSuspendBarrier(AtomicInteger* barrier) {
-#if ART_USE_FUTEXES
- timespec wait_timeout;
- InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
-#endif
- while (true) {
- int32_t cur_val = barrier->load(std::memory_order_acquire);
- if (cur_val <= 0) {
- CHECK_EQ(cur_val, 0);
- return true;
- }
-#if ART_USE_FUTEXES
- if (futex(barrier->Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0)
- != 0) {
- if (errno == ETIMEDOUT) {
- return false;
- } else if (errno != EAGAIN && errno != EINTR) {
- PLOG(FATAL) << "futex wait for suspend barrier failed";
- }
- }
-#endif
- // Else spin wait. This is likely to be slow, but ART_USE_FUTEXES is set on Linux,
- // including all targets.
- }
-}
-
void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
Thread* self = Thread::Current();
@@ -610,7 +605,7 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
ScopedTrace trace("Suspending mutator threads");
const uint64_t start_time = NanoTime();
- SuspendAllInternal(self);
+ SuspendAllInternal(self, self);
// All threads are known to have suspended (but a thread may still own the mutator lock)
// Make sure this thread grabs exclusive access to the mutator lock and its protected data.
#if HAVE_TIMED_RWLOCK
@@ -634,14 +629,14 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
const uint64_t end_time = NanoTime();
const uint64_t suspend_time = end_time - start_time;
- suspend_all_histogram_.AdjustAndAddValue(suspend_time);
+ suspend_all_historam_.AdjustAndAddValue(suspend_time);
if (suspend_time > kLongThreadSuspendThreshold) {
LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
}
if (kDebugLocking) {
// Debug check that all threads are suspended.
- AssertOtherThreadsAreSuspended(self);
+ AssertThreadsAreSuspended(self, self);
}
}
ATraceBegin((std::string("Mutator threads suspended for ") + cause).c_str());
@@ -654,8 +649,10 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
}
// Ensures all threads running Java suspend and that those not running Java don't start.
-void ThreadList::SuspendAllInternal(Thread* self, SuspendReason reason) {
- const uint64_t start_time = NanoTime();
+void ThreadList::SuspendAllInternal(Thread* self,
+ Thread* ignore1,
+ Thread* ignore2,
+ SuspendReason reason) {
Locks::mutator_lock_->AssertNotExclusiveHeld(self);
Locks::thread_list_lock_->AssertNotHeld(self);
Locks::thread_suspend_count_lock_->AssertNotHeld(self);
@@ -673,77 +670,85 @@ void ThreadList::SuspendAllInternal(Thread* self, SuspendReason reason) {
// The atomic counter for number of threads that need to pass the barrier.
AtomicInteger pending_threads;
- while (true) {
- {
- MutexLock mu(self, *Locks::thread_list_lock_);
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- bool in_flip = false;
- for (const auto& thread : list_) {
- if (thread->GetFlipFunction() != nullptr) {
- in_flip = true;
- break;
- }
- }
- if (LIKELY(suspend_all_count_ == 0
- && (self == nullptr || self->GetSuspendCount() == 0)
- && !in_flip)) {
- // The above condition remains valid while we hold thread_suspend_count_lock_ .
- bool found_myself = false;
- // Update global suspend all state for attaching threads.
- ++suspend_all_count_;
- pending_threads.store(list_.size() - (self == nullptr ? 0 : 1), std::memory_order_relaxed);
- // Increment everybody else's suspend count.
- for (const auto& thread : list_) {
- if (thread == self) {
- found_myself = true;
- } else {
- VLOG(threads) << "requesting thread suspend: " << *thread;
- DCHECK_EQ(suspend_all_count_, 1);
- thread->IncrementSuspendCount(self, &pending_threads, nullptr, reason);
-
- // Must install the pending_threads counter first, then check thread->IsSuspended()
- // and clear the counter. Otherwise there's a race with
- // Thread::TransitionFromRunnableToSuspended() that can lead a thread to miss a call
- // to PassActiveSuspendBarriers().
- if (thread->IsSuspended()) {
- // Effectively pass the barrier on behalf of the already suspended thread.
- DCHECK_EQ(thread->tlsPtr_.active_suspendall_barrier, &pending_threads);
- pending_threads.fetch_sub(1, std::memory_order_seq_cst);
- thread->tlsPtr_.active_suspendall_barrier = nullptr;
- if (!thread->HasActiveSuspendBarrier()) {
- thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
- }
- }
- }
- }
- DCHECK(self == nullptr || found_myself);
- break;
- }
- }
- // The if (LIKELY ...) condition above didn't hold. This is a bad time to initiate a suspend.
- // Either the suspendall_barrier is already in use, or proceeding at this time risks deadlock.
- // See b/31683379 for an explanation of the thread flip condition. Note that in the event of a
- // competing Suspend or SuspendAll, we are about to be suspended anyway. We hold no locks,
- // so it's safe to sleep and retry.
- VLOG(threads) << "SuspendAll is sleeping";
- usleep(kThreadSuspendSleepUs);
- // We're already not runnable, so an attempt to suspend us should succeed.
+ uint32_t num_ignored = 0;
+ if (ignore1 != nullptr) {
+ ++num_ignored;
}
-
- if (!WaitForSuspendBarrier(&pending_threads)) {
- const uint64_t wait_time = NanoTime() - start_time;
+ if (ignore2 != nullptr && ignore1 != ignore2) {
+ ++num_ignored;
+ }
+ {
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- std::ostringstream oss;
+ // Update global suspend all state for attaching threads.
+ ++suspend_all_count_;
+ pending_threads.store(list_.size() - num_ignored, std::memory_order_relaxed);
+ // Increment everybody's suspend count (except those that should be ignored).
for (const auto& thread : list_) {
- if (thread != self && !thread->IsSuspended()) {
- oss << std::endl << "Thread not suspended: " << *thread;
+ if (thread == ignore1 || thread == ignore2) {
+ continue;
+ }
+ VLOG(threads) << "requesting thread suspend: " << *thread;
+ bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, reason);
+ DCHECK(updated);
+
+ // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
+ // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()
+ // that can lead a thread to miss a call to PassActiveSuspendBarriers().
+ if (thread->IsSuspended()) {
+ // Only clear the counter for the current thread.
+ thread->ClearSuspendBarrier(&pending_threads);
+ pending_threads.fetch_sub(1, std::memory_order_seq_cst);
}
}
- LOG(::android::base::FATAL)
- << "Timed out waiting for threads to suspend, waited for "
- << PrettyDuration(wait_time)
- << oss.str();
+ }
+
+ // Wait for the barrier to be passed by all runnable threads. This wait
+ // is done with a timeout so that we can detect problems.
+#if ART_USE_FUTEXES
+ timespec wait_timeout;
+ InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
+#endif
+ const uint64_t start_time = NanoTime();
+ while (true) {
+ int32_t cur_val = pending_threads.load(std::memory_order_relaxed);
+ if (LIKELY(cur_val > 0)) {
+#if ART_USE_FUTEXES
+ if (futex(pending_threads.Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0)
+ != 0) {
+ if ((errno == EAGAIN) || (errno == EINTR)) {
+ // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
+ continue;
+ }
+ if (errno == ETIMEDOUT) {
+ const uint64_t wait_time = NanoTime() - start_time;
+ MutexLock mu(self, *Locks::thread_list_lock_);
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ std::ostringstream oss;
+ for (const auto& thread : list_) {
+ if (thread == ignore1 || thread == ignore2) {
+ continue;
+ }
+ if (!thread->IsSuspended()) {
+ oss << std::endl << "Thread not suspended: " << *thread;
+ }
+ }
+ LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
+ << "Timed out waiting for threads to suspend, waited for "
+ << PrettyDuration(wait_time)
+ << oss.str();
+ } else {
+ PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
+ }
+ } // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
+#else
+ // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
+ UNUSED(start_time);
+#endif
+ } else {
+ CHECK_EQ(cur_val, 0);
+ break;
+ }
}
}
@@ -762,7 +767,7 @@ void ThreadList::ResumeAll() {
if (kDebugLocking) {
// Debug check that all threads are suspended.
- AssertOtherThreadsAreSuspended(self);
+ AssertThreadsAreSuspended(self, self);
}
long_suspend_ = false;
@@ -775,9 +780,11 @@ void ThreadList::ResumeAll() {
--suspend_all_count_;
// Decrement the suspend counts for all threads.
for (const auto& thread : list_) {
- if (thread != self) {
- thread->DecrementSuspendCount(self);
+ if (thread == self) {
+ continue;
}
+ bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
}
// Broadcast a notification to all suspended threads, some or all of
@@ -822,7 +829,11 @@ bool ThreadList::Resume(Thread* thread, SuspendReason reason) {
<< ") thread not within thread list";
return false;
}
- thread->DecrementSuspendCount(self, reason);
+ if (UNLIKELY(!thread->ModifySuspendCount(self, -1, nullptr, reason))) {
+ LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
+ << ") could not modify suspend count.";
+ return false;
+ }
}
{
@@ -852,19 +863,40 @@ static void ThreadSuspendByPeerWarning(Thread* self,
}
}
-Thread* ThreadList::SuspendThreadByPeer(jobject peer, SuspendReason reason) {
- bool is_suspended = false;
+Thread* ThreadList::SuspendThreadByPeer(jobject peer,
+ SuspendReason reason,
+ bool* timed_out) {
+ bool request_suspension = true;
+ const uint64_t start_time = NanoTime();
+ int self_suspend_count = 0;
+ useconds_t sleep_us = kThreadSuspendInitialSleepUs;
+ *timed_out = false;
Thread* const self = Thread::Current();
+ Thread* suspended_thread = nullptr;
VLOG(threads) << "SuspendThreadByPeer starting";
- Thread* thread;
- WrappedSuspend1Barrier wrapped_barrier{};
while (true) {
+ Thread* thread;
{
- // Note: this will transition to runnable and potentially suspend.
+ // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+ // is requesting another suspend, to avoid deadlock, by requiring this function be called
+ // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+ // than request thread suspension, to avoid potential cycles in threads requesting each other
+ // suspend.
ScopedObjectAccess soa(self);
MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
thread = Thread::FromManagedThread(soa, peer);
if (thread == nullptr) {
+ if (suspended_thread != nullptr) {
+ MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
+ // If we incremented the suspend count but the thread reset its peer, we need to
+ // re-decrement it since it is shutting down and may deadlock the runtime in
+ // ThreadList::WaitForOtherNonDaemonThreadsToExit.
+ bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
+ -1,
+ nullptr,
+ reason);
+ DCHECK(updated);
+ }
ThreadSuspendByPeerWarning(self,
::android::base::WARNING,
"No such thread for suspend",
@@ -872,138 +904,188 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, SuspendReason reason) {
return nullptr;
}
if (!Contains(thread)) {
+ CHECK(suspended_thread == nullptr);
VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: "
<< reinterpret_cast<void*>(thread);
return nullptr;
}
- // IsSuspended on the current thread will fail as the current thread is changed into
- // Runnable above. As the suspend count is now raised if this is the current thread
- // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
- // to just explicitly handle the current thread in the callers to this code.
- CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
{
MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
- if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) {
- thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason);
- if (thread->IsSuspended()) {
- // See the discussion in mutator_gc_coord.md for the race here.
- thread->RemoveFirstSuspend1Barrier();
- if (!thread->HasActiveSuspendBarrier()) {
- thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
- }
- is_suspended = true;
+ if (request_suspension) {
+ if (self->GetSuspendCount() > 0) {
+ // We hold the suspend count lock but another thread is trying to suspend us. Its not
+ // safe to try to suspend another thread in case we get a cycle. Start the loop again
+ // which will allow this thread to be suspended.
+ ++self_suspend_count;
+ continue;
}
- DCHECK_GT(thread->GetSuspendCount(), 0);
- break;
+ CHECK(suspended_thread == nullptr);
+ suspended_thread = thread;
+ bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, reason);
+ DCHECK(updated);
+ request_suspension = false;
+ } else {
+ // If the caller isn't requesting suspension, a suspension should have already occurred.
+ CHECK_GT(thread->GetSuspendCount(), 0);
+ }
+ // IsSuspended on the current thread will fail as the current thread is changed into
+ // Runnable above. As the suspend count is now raised if this is the current thread
+ // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+ // to just explicitly handle the current thread in the callers to this code.
+ CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+ // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+ // count, or else we've waited and it has self suspended) or is the current thread, we're
+ // done.
+ if (thread->IsSuspended()) {
+ VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
+ if (ATraceEnabled()) {
+ std::string name;
+ thread->GetThreadName(name);
+ ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
+ peer).c_str());
+ }
+ return thread;
+ }
+ const uint64_t total_delay = NanoTime() - start_time;
+ if (total_delay >= thread_suspend_timeout_ns_) {
+ if (suspended_thread == nullptr) {
+ ThreadSuspendByPeerWarning(self,
+ ::android::base::FATAL,
+ "Failed to issue suspend request",
+ peer);
+ } else {
+ CHECK_EQ(suspended_thread, thread);
+ LOG(WARNING) << "Suspended thread state_and_flags: "
+ << suspended_thread->StateAndFlagsAsHexString()
+ << ", self_suspend_count = " << self_suspend_count;
+ // Explicitly release thread_suspend_count_lock_; we haven't held it for long, so
+ // seeing threads blocked on it is not informative.
+ Locks::thread_suspend_count_lock_->Unlock(self);
+ ThreadSuspendByPeerWarning(self,
+ ::android::base::FATAL,
+ "Thread suspension timed out",
+ peer);
+ }
+ UNREACHABLE();
+ } else if (sleep_us == 0 &&
+ total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
+ // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
+ // excessive CPU usage.
+ sleep_us = kThreadSuspendMaxYieldUs / 2;
}
- // Else we either hold the suspend count lock but another thread is trying to suspend us,
- // making it unsafe to try to suspend another thread in case we get a cycle. Or we're
- // currently in the middle of a flip, and could otherwise encounter b/31683379. In either
- // case, start the loop again, which will allow this thread to be suspended.
}
+ // Release locks and come out of runnable state.
}
- // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry.
- VLOG(threads) << "SuspendThreadByPeer is sleeping";
- usleep(kThreadSuspendSleepUs);
- }
- // Now wait for target to decrement suspend barrier.
- if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) {
- // wrapped_barrier.barrier_ has been decremented and will no longer be accessed.
- VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
- if (ATraceEnabled()) {
- std::string name;
- thread->GetThreadName(name);
- ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
- peer).c_str());
- }
- return thread;
- } else {
- LOG(WARNING) << "Suspended thread state_and_flags: "
- << thread->StateAndFlagsAsHexString();
- // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe
- // without additional cleanup.
- ThreadSuspendByPeerWarning(self,
- ::android::base::FATAL,
- "SuspendThreadByPeer timed out",
- peer);
- UNREACHABLE();
+ VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend";
+ ThreadSuspendSleep(sleep_us);
+ // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at
+ // all if possible. This shouldn't be an issue since time to suspend should always be small.
+ sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
}
}
-
static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
const char* message,
uint32_t thread_id) {
LOG(severity) << StringPrintf("%s: %d", message, thread_id);
}
-Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason) {
- bool is_suspended = false;
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
+ SuspendReason reason,
+ bool* timed_out) {
+ const uint64_t start_time = NanoTime();
+ useconds_t sleep_us = kThreadSuspendInitialSleepUs;
+ *timed_out = false;
+ Thread* suspended_thread = nullptr;
Thread* const self = Thread::Current();
CHECK_NE(thread_id, kInvalidThreadId);
VLOG(threads) << "SuspendThreadByThreadId starting";
- Thread* thread;
- WrappedSuspend1Barrier wrapped_barrier{};
while (true) {
{
- // Note: this will transition to runnable and potentially suspend.
+ // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+ // is requesting another suspend, to avoid deadlock, by requiring this function be called
+ // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+ // than request thread suspension, to avoid potential cycles in threads requesting each other
+ // suspend.
ScopedObjectAccess soa(self);
MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
- thread = FindThreadByThreadId(thread_id);
+ Thread* thread = nullptr;
+ for (const auto& it : list_) {
+ if (it->GetThreadId() == thread_id) {
+ thread = it;
+ break;
+ }
+ }
if (thread == nullptr) {
+ CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
+ << " no longer in thread list";
// There's a race in inflating a lock and the owner giving up ownership and then dying.
ThreadSuspendByThreadIdWarning(::android::base::WARNING,
"No such thread id for suspend",
thread_id);
return nullptr;
}
- DCHECK(Contains(thread));
- CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
+ DCHECK(Contains(thread));
{
MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
- if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) {
- thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason);
- if (thread->IsSuspended()) {
- // See the discussion in mutator_gc_coord.md for the race here.
- thread->RemoveFirstSuspend1Barrier();
- if (!thread->HasActiveSuspendBarrier()) {
- thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
- }
- is_suspended = true;
+ if (suspended_thread == nullptr) {
+ if (self->GetSuspendCount() > 0) {
+ // We hold the suspend count lock but another thread is trying to suspend us. Its not
+ // safe to try to suspend another thread in case we get a cycle. Start the loop again
+ // which will allow this thread to be suspended.
+ continue;
}
- DCHECK_GT(thread->GetSuspendCount(), 0);
- break;
+ bool updated = thread->ModifySuspendCount(self, +1, nullptr, reason);
+ DCHECK(updated);
+ suspended_thread = thread;
+ } else {
+ CHECK_EQ(suspended_thread, thread);
+ // If the caller isn't requesting suspension, a suspension should have already occurred.
+ CHECK_GT(thread->GetSuspendCount(), 0);
+ }
+ // IsSuspended on the current thread will fail as the current thread is changed into
+ // Runnable above. As the suspend count is now raised if this is the current thread
+ // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+ // to just explicitly handle the current thread in the callers to this code.
+ CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+ // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+ // count, or else we've waited and it has self suspended) or is the current thread, we're
+ // done.
+ if (thread->IsSuspended()) {
+ if (ATraceEnabled()) {
+ std::string name;
+ thread->GetThreadName(name);
+ ATraceBegin(StringPrintf("SuspendThreadByThreadId suspended %s id=%d",
+ name.c_str(), thread_id).c_str());
+ }
+ VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
+ return thread;
+ }
+ const uint64_t total_delay = NanoTime() - start_time;
+ if (total_delay >= thread_suspend_timeout_ns_) {
+ ThreadSuspendByThreadIdWarning(::android::base::WARNING,
+ "Thread suspension timed out",
+ thread_id);
+ if (suspended_thread != nullptr) {
+ bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, reason);
+ DCHECK(updated);
+ }
+ *timed_out = true;
+ return nullptr;
+ } else if (sleep_us == 0 &&
+ total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
+ // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
+ // excessive CPU usage.
+ sleep_us = kThreadSuspendMaxYieldUs / 2;
}
- // Else we either hold the suspend count lock but another thread is trying to suspend us,
- // making it unsafe to try to suspend another thread in case we get a cycle. Or we're
- // currently in the middle of a flip, and could otherwise encounter b/31683379. In either
- // case, start the loop again, which will allow this thread to be suspended.
}
+ // Release locks and come out of runnable state.
}
- // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry.
- VLOG(threads) << "SuspendThreadByThreadId is sleeping";
- usleep(kThreadSuspendSleepUs);
- }
- // Now wait for target to decrement suspend barrier.
- if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) {
- // wrapped_barrier.barrier_ has been decremented and will no longer be accessed.
- VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
- if (ATraceEnabled()) {
- std::string name;
- thread->GetThreadName(name);
- ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for id=%d",
- name.c_str(), thread_id).c_str());
- }
- return thread;
- } else {
- // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe without
- // additional cleanup.
- ThreadSuspendByThreadIdWarning(::android::base::FATAL,
- "SuspendThreadByThreadId timed out",
- thread_id);
- UNREACHABLE();
+ VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend";
+ ThreadSuspendSleep(sleep_us);
+ sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
}
}
@@ -1079,7 +1161,8 @@ void ThreadList::SuspendAllDaemonThreadsForShutdown() {
// daemons.
CHECK(thread->IsDaemon()) << *thread;
if (thread != self) {
- thread->IncrementSuspendCount(self);
+ bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
++daemons_left;
}
// We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
@@ -1179,10 +1262,11 @@ void ThreadList::Register(Thread* self) {
// SuspendAll requests.
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- if (suspend_all_count_ == 1) {
- self->IncrementSuspendCount(self);
- } else {
- DCHECK_EQ(suspend_all_count_, 0);
+ // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
+ // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
+ for (int delta = suspend_all_count_; delta > 0; delta--) {
+ bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
}
CHECK(!Contains(self));
list_.push_back(self);
@@ -1288,11 +1372,13 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (Thread* thread : list_) {
- thread->IncrementSuspendCount(self);
+ bool suspended = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
+ DCHECK(suspended);
if (thread == self || thread->IsSuspended()) {
threads_to_visit.push_back(thread);
} else {
- thread->DecrementSuspendCount(self);
+ bool resumed = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(resumed);
}
}
}
@@ -1307,7 +1393,8 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
{
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (Thread* thread : threads_to_visit) {
- thread->DecrementSuspendCount(self);
+ bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
}
}
}
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index a17a11b3ce..29b0c52186 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -70,7 +70,7 @@ class ThreadList {
bool Resume(Thread* thread, SuspendReason reason = SuspendReason::kInternal)
REQUIRES(!Locks::thread_suspend_count_lock_) WARN_UNUSED;
- // Suspends all other threads and gets exclusive access to the mutator lock.
+ // Suspends all threads and gets exclusive access to the mutator lock.
// If long_suspend is true, then other threads who try to suspend will never timeout.
// long_suspend is currenly used for hprof since large heaps take a long time.
void SuspendAll(const char* cause, bool long_suspend = false)
@@ -81,8 +81,10 @@ class ThreadList {
// Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
// else null. The peer is used to identify the thread to avoid races with the thread terminating.
+ // If the suspension times out then *timeout is set to true.
Thread* SuspendThreadByPeer(jobject peer,
- SuspendReason reason)
+ SuspendReason reason,
+ bool* timed_out)
REQUIRES(!Locks::mutator_lock_,
!Locks::thread_list_lock_,
!Locks::thread_suspend_count_lock_);
@@ -90,8 +92,8 @@ class ThreadList {
// Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
// thread on success else null. The thread id is used to identify the thread to avoid races with
// the thread terminating. Note that as thread ids are recycled this may not suspend the expected
- // thread, that may be terminating.
- Thread* SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason)
+ // thread, that may be terminating. If the suspension times out then *timeout is set to true.
+ Thread* SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason, bool* timed_out)
REQUIRES(!Locks::mutator_lock_,
!Locks::thread_list_lock_,
!Locks::thread_suspend_count_lock_);
@@ -111,9 +113,8 @@ class ThreadList {
// Running threads are not suspended but run the checkpoint inside of the suspend check. The
// return value includes already suspended threads for b/24191051. Runs or requests the
// callback, if non-null, inside the thread_list_lock critical section after determining the
- // runnable/suspended states of the threads. Does not wait for completion of the checkpoint
- // function in running threads. If the caller holds the mutator lock, then all instances of the
- // checkpoint function are run with the mutator lock.
+ // runnable/suspended states of the threads. Does not wait for completion of the callbacks in
+ // running threads.
size_t RunCheckpoint(Closure* checkpoint_function, Closure* callback = nullptr)
REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
@@ -196,15 +197,12 @@ class ThreadList {
REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
void SuspendAllInternal(Thread* self,
+ Thread* ignore1,
+ Thread* ignore2 = nullptr,
SuspendReason reason = SuspendReason::kInternal)
- REQUIRES(!Locks::thread_list_lock_,
- !Locks::thread_suspend_count_lock_,
- !Locks::mutator_lock_);
-
- // Wait for suspend barrier to reach zero. Return false on timeout.
- bool WaitForSuspendBarrier(AtomicInteger* barrier);
+ REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
- void AssertOtherThreadsAreSuspended(Thread* self)
+ void AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2 = nullptr)
REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_);
@@ -213,7 +211,6 @@ class ThreadList {
std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_);
// Ongoing suspend all requests, used to ensure threads added to list_ respect SuspendAll.
- // The value is always either 0 or 1.
int suspend_all_count_ GUARDED_BY(Locks::thread_suspend_count_lock_);
// Number of threads unregistering, ~ThreadList blocks until this hits 0.
@@ -221,7 +218,7 @@ class ThreadList {
// Thread suspend time histogram. Only modified when all the threads are suspended, so guarding
// by mutator lock ensures no thread can read when another thread is modifying it.
- Histogram<uint64_t> suspend_all_histogram_ GUARDED_BY(Locks::mutator_lock_);
+ Histogram<uint64_t> suspend_all_historam_ GUARDED_BY(Locks::mutator_lock_);
// Whether or not the current thread suspension is long.
bool long_suspend_;