summaryrefslogtreecommitdiff
path: root/runtime/thread_list.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/thread_list.cc')
-rw-r--r--runtime/thread_list.cc527
1 files changed, 220 insertions, 307 deletions
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 6b23c349ef..77e373d7de 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -62,10 +62,7 @@ namespace art {
using android::base::StringPrintf;
static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
-// Use 0 since we want to yield to prevent blocking for an unpredictable amount of time.
-static constexpr useconds_t kThreadSuspendInitialSleepUs = 0;
-static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
-static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
+static constexpr useconds_t kThreadSuspendSleepUs = 1000;
// Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
// some history.
@@ -74,7 +71,7 @@ static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true;
ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns)
: suspend_all_count_(0),
unregistering_count_(0),
- suspend_all_historam_("suspend all histogram", 16, 64),
+ suspend_all_histogram_("suspend all histogram", 16, 64),
long_suspend_(false),
shut_down_(false),
thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
@@ -135,10 +132,10 @@ void ThreadList::DumpForSigQuit(std::ostream& os) {
{
ScopedObjectAccess soa(Thread::Current());
// Only print if we have samples.
- if (suspend_all_historam_.SampleSize() > 0) {
+ if (suspend_all_histogram_.SampleSize() > 0) {
Histogram<uint64_t>::CumulativeData data;
- suspend_all_historam_.CreateHistogram(&data);
- suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend.
+ suspend_all_histogram_.CreateHistogram(&data);
+ suspend_all_histogram_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend.
}
}
bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
@@ -260,11 +257,11 @@ void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
}
}
-void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) {
+void ThreadList::AssertOtherThreadsAreSuspended(Thread* self) {
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (const auto& thread : list_) {
- if (thread != ignore1 && thread != ignore2) {
+ if (thread != self) {
CHECK(thread->IsSuspended())
<< "\nUnsuspended thread: <<" << *thread << "\n"
<< "self: <<" << *Thread::Current();
@@ -291,19 +288,6 @@ NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() {
}
#endif
-// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
-// individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then
-// we use sched_yield instead of calling usleep.
-// Although there is the possibility, here and elsewhere, that usleep could return -1 and
-// errno = EINTR, there should be no problem if interrupted, so we do not check.
-static void ThreadSuspendSleep(useconds_t delay_us) {
- if (delay_us == 0) {
- sched_yield();
- } else {
- usleep(delay_us);
- }
-}
-
size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) {
Thread* self = Thread::Current();
Locks::mutator_lock_->AssertNotExclusiveHeld(self);
@@ -326,9 +310,7 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback
// This thread will run its checkpoint some time in the near future.
if (requested_suspend) {
// The suspend request is now unnecessary.
- bool updated =
- thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ thread->DecrementSuspendCount(self);
requested_suspend = false;
}
break;
@@ -339,9 +321,9 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback
continue;
}
if (!requested_suspend) {
- bool updated =
- thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ // Since we don't suspend other threads to run checkpoint_function, we claim this is
+ // safe even with flip_function set.
+ thread->IncrementSuspendCount(self);
requested_suspend = true;
if (thread->IsSuspended()) {
break;
@@ -376,8 +358,7 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback
checkpoint_function->Run(thread);
{
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ thread->DecrementSuspendCount(self);
}
}
@@ -516,14 +497,14 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
// ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the
// pause.
const uint64_t suspend_start_time = NanoTime();
- SuspendAllInternal(self, self, nullptr);
+ SuspendAllInternal(self);
if (pause_listener != nullptr) {
pause_listener->StartPause();
}
// Run the flip callback for the collector.
Locks::mutator_lock_->ExclusiveLock(self);
- suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time);
+ suspend_all_histogram_.AdjustAndAddValue(NanoTime() - suspend_start_time);
flip_callback->Run(self);
Locks::mutator_lock_->ExclusiveUnlock(self);
collector->RegisterPause(NanoTime() - suspend_start_time);
@@ -554,8 +535,7 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
if ((state == ThreadState::kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) &&
thread->GetSuspendCount() == 1) {
// The thread will resume right after the broadcast.
- bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ thread->DecrementSuspendCount(self);
++runnable_thread_count;
} else {
other_threads.push_back(thread);
@@ -584,8 +564,7 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (const auto& thread : other_threads) {
- bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ thread->DecrementSuspendCount(self);
}
Thread::resume_cond_->Broadcast(self);
}
@@ -593,6 +572,32 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
return runnable_thread_count + other_threads.size() + 1; // +1 for self.
}
+bool ThreadList::WaitForSuspendBarrier(AtomicInteger* barrier) {
+#if ART_USE_FUTEXES
+ timespec wait_timeout;
+ InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
+#endif
+ while (true) {
+ int32_t cur_val = barrier->load(std::memory_order_acquire);
+ if (cur_val <= 0) {
+ CHECK_EQ(cur_val, 0);
+ return true;
+ }
+#if ART_USE_FUTEXES
+ if (futex(barrier->Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0)
+ != 0) {
+ if (errno == ETIMEDOUT) {
+ return false;
+ } else if (errno != EAGAIN && errno != EINTR) {
+ PLOG(FATAL) << "futex wait for suspend barrier failed";
+ }
+ }
+#endif
+ // Else spin wait. This is likely to be slow, but ART_USE_FUTEXES is set on Linux,
+ // including all targets.
+ }
+}
+
void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
Thread* self = Thread::Current();
@@ -605,7 +610,7 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
ScopedTrace trace("Suspending mutator threads");
const uint64_t start_time = NanoTime();
- SuspendAllInternal(self, self);
+ SuspendAllInternal(self);
// All threads are known to have suspended (but a thread may still own the mutator lock)
// Make sure this thread grabs exclusive access to the mutator lock and its protected data.
#if HAVE_TIMED_RWLOCK
@@ -629,14 +634,14 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
const uint64_t end_time = NanoTime();
const uint64_t suspend_time = end_time - start_time;
- suspend_all_historam_.AdjustAndAddValue(suspend_time);
+ suspend_all_histogram_.AdjustAndAddValue(suspend_time);
if (suspend_time > kLongThreadSuspendThreshold) {
LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
}
if (kDebugLocking) {
// Debug check that all threads are suspended.
- AssertThreadsAreSuspended(self, self);
+ AssertOtherThreadsAreSuspended(self);
}
}
ATraceBegin((std::string("Mutator threads suspended for ") + cause).c_str());
@@ -649,10 +654,8 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
}
// Ensures all threads running Java suspend and that those not running Java don't start.
-void ThreadList::SuspendAllInternal(Thread* self,
- Thread* ignore1,
- Thread* ignore2,
- SuspendReason reason) {
+void ThreadList::SuspendAllInternal(Thread* self, SuspendReason reason) {
+ const uint64_t start_time = NanoTime();
Locks::mutator_lock_->AssertNotExclusiveHeld(self);
Locks::thread_list_lock_->AssertNotHeld(self);
Locks::thread_suspend_count_lock_->AssertNotHeld(self);
@@ -670,85 +673,77 @@ void ThreadList::SuspendAllInternal(Thread* self,
// The atomic counter for number of threads that need to pass the barrier.
AtomicInteger pending_threads;
- uint32_t num_ignored = 0;
- if (ignore1 != nullptr) {
- ++num_ignored;
- }
- if (ignore2 != nullptr && ignore1 != ignore2) {
- ++num_ignored;
- }
- {
- MutexLock mu(self, *Locks::thread_list_lock_);
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- // Update global suspend all state for attaching threads.
- ++suspend_all_count_;
- pending_threads.store(list_.size() - num_ignored, std::memory_order_relaxed);
- // Increment everybody's suspend count (except those that should be ignored).
- for (const auto& thread : list_) {
- if (thread == ignore1 || thread == ignore2) {
- continue;
- }
- VLOG(threads) << "requesting thread suspend: " << *thread;
- bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, reason);
- DCHECK(updated);
-
- // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
- // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()
- // that can lead a thread to miss a call to PassActiveSuspendBarriers().
- if (thread->IsSuspended()) {
- // Only clear the counter for the current thread.
- thread->ClearSuspendBarrier(&pending_threads);
- pending_threads.fetch_sub(1, std::memory_order_seq_cst);
- }
- }
- }
-
- // Wait for the barrier to be passed by all runnable threads. This wait
- // is done with a timeout so that we can detect problems.
-#if ART_USE_FUTEXES
- timespec wait_timeout;
- InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
-#endif
- const uint64_t start_time = NanoTime();
while (true) {
- int32_t cur_val = pending_threads.load(std::memory_order_relaxed);
- if (LIKELY(cur_val > 0)) {
-#if ART_USE_FUTEXES
- if (futex(pending_threads.Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0)
- != 0) {
- if ((errno == EAGAIN) || (errno == EINTR)) {
- // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
- continue;
+ {
+ MutexLock mu(self, *Locks::thread_list_lock_);
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ bool in_flip = false;
+ for (const auto& thread : list_) {
+ if (thread->GetFlipFunction() != nullptr) {
+ in_flip = true;
+ break;
}
- if (errno == ETIMEDOUT) {
- const uint64_t wait_time = NanoTime() - start_time;
- MutexLock mu(self, *Locks::thread_list_lock_);
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- std::ostringstream oss;
- for (const auto& thread : list_) {
- if (thread == ignore1 || thread == ignore2) {
- continue;
- }
- if (!thread->IsSuspended()) {
- oss << std::endl << "Thread not suspended: " << *thread;
+ }
+ if (LIKELY(suspend_all_count_ == 0
+ && (self == nullptr || self->GetSuspendCount() == 0)
+ && !in_flip)) {
+ // The above condition remains valid while we hold thread_suspend_count_lock_ .
+ bool found_myself = false;
+ // Update global suspend all state for attaching threads.
+ ++suspend_all_count_;
+ pending_threads.store(list_.size() - (self == nullptr ? 0 : 1), std::memory_order_relaxed);
+ // Increment everybody else's suspend count.
+ for (const auto& thread : list_) {
+ if (thread == self) {
+ found_myself = true;
+ } else {
+ VLOG(threads) << "requesting thread suspend: " << *thread;
+ DCHECK_EQ(suspend_all_count_, 1);
+ thread->IncrementSuspendCount(self, &pending_threads, nullptr, reason);
+
+ // Must install the pending_threads counter first, then check thread->IsSuspended()
+ // and clear the counter. Otherwise there's a race with
+ // Thread::TransitionFromRunnableToSuspended() that can lead a thread to miss a call
+ // to PassActiveSuspendBarriers().
+ if (thread->IsSuspended()) {
+ // Effectively pass the barrier on behalf of the already suspended thread.
+ DCHECK_EQ(thread->tlsPtr_.active_suspendall_barrier, &pending_threads);
+ pending_threads.fetch_sub(1, std::memory_order_seq_cst);
+ thread->tlsPtr_.active_suspendall_barrier = nullptr;
+ if (!thread->HasActiveSuspendBarrier()) {
+ thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
+ }
}
}
- LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
- << "Timed out waiting for threads to suspend, waited for "
- << PrettyDuration(wait_time)
- << oss.str();
- } else {
- PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
}
- } // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
-#else
- // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
- UNUSED(start_time);
-#endif
- } else {
- CHECK_EQ(cur_val, 0);
- break;
+ DCHECK(self == nullptr || found_myself);
+ break;
+ }
}
+ // The if (LIKELY ...) condition above didn't hold. This is a bad time to initiate a suspend.
+ // Either the suspendall_barrier is already in use, or proceeding at this time risks deadlock.
+ // See b/31683379 for an explanation of the thread flip condition. Note that in the event of a
+ // competing Suspend or SuspendAll, we are about to be suspended anyway. We hold no locks,
+ // so it's safe to sleep and retry.
+ VLOG(threads) << "SuspendAll is sleeping";
+ usleep(kThreadSuspendSleepUs);
+ // We're already not runnable, so an attempt to suspend us should succeed.
+ }
+
+ if (!WaitForSuspendBarrier(&pending_threads)) {
+ const uint64_t wait_time = NanoTime() - start_time;
+ MutexLock mu(self, *Locks::thread_list_lock_);
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ std::ostringstream oss;
+ for (const auto& thread : list_) {
+ if (thread != self && !thread->IsSuspended()) {
+ oss << std::endl << "Thread not suspended: " << *thread;
+ }
+ }
+ LOG(::android::base::FATAL)
+ << "Timed out waiting for threads to suspend, waited for "
+ << PrettyDuration(wait_time)
+ << oss.str();
}
}
@@ -767,7 +762,7 @@ void ThreadList::ResumeAll() {
if (kDebugLocking) {
// Debug check that all threads are suspended.
- AssertThreadsAreSuspended(self, self);
+ AssertOtherThreadsAreSuspended(self);
}
long_suspend_ = false;
@@ -780,11 +775,9 @@ void ThreadList::ResumeAll() {
--suspend_all_count_;
// Decrement the suspend counts for all threads.
for (const auto& thread : list_) {
- if (thread == self) {
- continue;
+ if (thread != self) {
+ thread->DecrementSuspendCount(self);
}
- bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
}
// Broadcast a notification to all suspended threads, some or all of
@@ -829,11 +822,7 @@ bool ThreadList::Resume(Thread* thread, SuspendReason reason) {
<< ") thread not within thread list";
return false;
}
- if (UNLIKELY(!thread->ModifySuspendCount(self, -1, nullptr, reason))) {
- LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
- << ") could not modify suspend count.";
- return false;
- }
+ thread->DecrementSuspendCount(self, reason);
}
{
@@ -863,40 +852,19 @@ static void ThreadSuspendByPeerWarning(Thread* self,
}
}
-Thread* ThreadList::SuspendThreadByPeer(jobject peer,
- SuspendReason reason,
- bool* timed_out) {
- bool request_suspension = true;
- const uint64_t start_time = NanoTime();
- int self_suspend_count = 0;
- useconds_t sleep_us = kThreadSuspendInitialSleepUs;
- *timed_out = false;
+Thread* ThreadList::SuspendThreadByPeer(jobject peer, SuspendReason reason) {
+ bool is_suspended = false;
Thread* const self = Thread::Current();
- Thread* suspended_thread = nullptr;
VLOG(threads) << "SuspendThreadByPeer starting";
+ Thread* thread;
+ WrappedSuspend1Barrier wrapped_barrier{};
while (true) {
- Thread* thread;
{
- // Note: this will transition to runnable and potentially suspend. We ensure only one thread
- // is requesting another suspend, to avoid deadlock, by requiring this function be called
- // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
- // than request thread suspension, to avoid potential cycles in threads requesting each other
- // suspend.
+ // Note: this will transition to runnable and potentially suspend.
ScopedObjectAccess soa(self);
MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
thread = Thread::FromManagedThread(soa, peer);
if (thread == nullptr) {
- if (suspended_thread != nullptr) {
- MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
- // If we incremented the suspend count but the thread reset its peer, we need to
- // re-decrement it since it is shutting down and may deadlock the runtime in
- // ThreadList::WaitForOtherNonDaemonThreadsToExit.
- bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
- -1,
- nullptr,
- reason);
- DCHECK(updated);
- }
ThreadSuspendByPeerWarning(self,
::android::base::WARNING,
"No such thread for suspend",
@@ -904,188 +872,138 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer,
return nullptr;
}
if (!Contains(thread)) {
- CHECK(suspended_thread == nullptr);
VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: "
<< reinterpret_cast<void*>(thread);
return nullptr;
}
+ // IsSuspended on the current thread will fail as the current thread is changed into
+ // Runnable above. As the suspend count is now raised if this is the current thread
+ // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+ // to just explicitly handle the current thread in the callers to this code.
+ CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
{
MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
- if (request_suspension) {
- if (self->GetSuspendCount() > 0) {
- // We hold the suspend count lock but another thread is trying to suspend us. Its not
- // safe to try to suspend another thread in case we get a cycle. Start the loop again
- // which will allow this thread to be suspended.
- ++self_suspend_count;
- continue;
- }
- CHECK(suspended_thread == nullptr);
- suspended_thread = thread;
- bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, reason);
- DCHECK(updated);
- request_suspension = false;
- } else {
- // If the caller isn't requesting suspension, a suspension should have already occurred.
- CHECK_GT(thread->GetSuspendCount(), 0);
- }
- // IsSuspended on the current thread will fail as the current thread is changed into
- // Runnable above. As the suspend count is now raised if this is the current thread
- // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
- // to just explicitly handle the current thread in the callers to this code.
- CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
- // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
- // count, or else we've waited and it has self suspended) or is the current thread, we're
- // done.
- if (thread->IsSuspended()) {
- VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
- if (ATraceEnabled()) {
- std::string name;
- thread->GetThreadName(name);
- ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
- peer).c_str());
- }
- return thread;
- }
- const uint64_t total_delay = NanoTime() - start_time;
- if (total_delay >= thread_suspend_timeout_ns_) {
- if (suspended_thread == nullptr) {
- ThreadSuspendByPeerWarning(self,
- ::android::base::FATAL,
- "Failed to issue suspend request",
- peer);
- } else {
- CHECK_EQ(suspended_thread, thread);
- LOG(WARNING) << "Suspended thread state_and_flags: "
- << suspended_thread->StateAndFlagsAsHexString()
- << ", self_suspend_count = " << self_suspend_count;
- // Explicitly release thread_suspend_count_lock_; we haven't held it for long, so
- // seeing threads blocked on it is not informative.
- Locks::thread_suspend_count_lock_->Unlock(self);
- ThreadSuspendByPeerWarning(self,
- ::android::base::FATAL,
- "Thread suspension timed out",
- peer);
+ if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) {
+ thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason);
+ if (thread->IsSuspended()) {
+ // See the discussion in mutator_gc_coord.md for the race here.
+ thread->RemoveFirstSuspend1Barrier();
+ if (!thread->HasActiveSuspendBarrier()) {
+ thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
+ }
+ is_suspended = true;
}
- UNREACHABLE();
- } else if (sleep_us == 0 &&
- total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
- // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
- // excessive CPU usage.
- sleep_us = kThreadSuspendMaxYieldUs / 2;
+ DCHECK_GT(thread->GetSuspendCount(), 0);
+ break;
}
+ // Else we either hold the suspend count lock but another thread is trying to suspend us,
+ // making it unsafe to try to suspend another thread in case we get a cycle. Or we're
+ // currently in the middle of a flip, and could otherwise encounter b/31683379. In either
+ // case, start the loop again, which will allow this thread to be suspended.
}
- // Release locks and come out of runnable state.
}
- VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend";
- ThreadSuspendSleep(sleep_us);
- // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at
- // all if possible. This shouldn't be an issue since time to suspend should always be small.
- sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
+ // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry.
+ VLOG(threads) << "SuspendThreadByPeer is sleeping";
+ usleep(kThreadSuspendSleepUs);
+ }
+ // Now wait for target to decrement suspend barrier.
+ if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) {
+ // wrapped_barrier.barrier_ has been decremented and will no longer be accessed.
+ VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
+ if (ATraceEnabled()) {
+ std::string name;
+ thread->GetThreadName(name);
+ ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
+ peer).c_str());
+ }
+ return thread;
+ } else {
+ LOG(WARNING) << "Suspended thread state_and_flags: "
+ << thread->StateAndFlagsAsHexString();
+ // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe
+ // without additional cleanup.
+ ThreadSuspendByPeerWarning(self,
+ ::android::base::FATAL,
+ "SuspendThreadByPeer timed out",
+ peer);
+ UNREACHABLE();
}
}
+
static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
const char* message,
uint32_t thread_id) {
LOG(severity) << StringPrintf("%s: %d", message, thread_id);
}
-Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
- SuspendReason reason,
- bool* timed_out) {
- const uint64_t start_time = NanoTime();
- useconds_t sleep_us = kThreadSuspendInitialSleepUs;
- *timed_out = false;
- Thread* suspended_thread = nullptr;
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason) {
+ bool is_suspended = false;
Thread* const self = Thread::Current();
CHECK_NE(thread_id, kInvalidThreadId);
VLOG(threads) << "SuspendThreadByThreadId starting";
+ Thread* thread;
+ WrappedSuspend1Barrier wrapped_barrier{};
while (true) {
{
- // Note: this will transition to runnable and potentially suspend. We ensure only one thread
- // is requesting another suspend, to avoid deadlock, by requiring this function be called
- // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
- // than request thread suspension, to avoid potential cycles in threads requesting each other
- // suspend.
+ // Note: this will transition to runnable and potentially suspend.
ScopedObjectAccess soa(self);
MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
- Thread* thread = nullptr;
- for (const auto& it : list_) {
- if (it->GetThreadId() == thread_id) {
- thread = it;
- break;
- }
- }
+ thread = FindThreadByThreadId(thread_id);
if (thread == nullptr) {
- CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
- << " no longer in thread list";
// There's a race in inflating a lock and the owner giving up ownership and then dying.
ThreadSuspendByThreadIdWarning(::android::base::WARNING,
"No such thread id for suspend",
thread_id);
return nullptr;
}
- VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
DCHECK(Contains(thread));
+ CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+ VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
{
MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
- if (suspended_thread == nullptr) {
- if (self->GetSuspendCount() > 0) {
- // We hold the suspend count lock but another thread is trying to suspend us. Its not
- // safe to try to suspend another thread in case we get a cycle. Start the loop again
- // which will allow this thread to be suspended.
- continue;
- }
- bool updated = thread->ModifySuspendCount(self, +1, nullptr, reason);
- DCHECK(updated);
- suspended_thread = thread;
- } else {
- CHECK_EQ(suspended_thread, thread);
- // If the caller isn't requesting suspension, a suspension should have already occurred.
- CHECK_GT(thread->GetSuspendCount(), 0);
- }
- // IsSuspended on the current thread will fail as the current thread is changed into
- // Runnable above. As the suspend count is now raised if this is the current thread
- // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
- // to just explicitly handle the current thread in the callers to this code.
- CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
- // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
- // count, or else we've waited and it has self suspended) or is the current thread, we're
- // done.
- if (thread->IsSuspended()) {
- if (ATraceEnabled()) {
- std::string name;
- thread->GetThreadName(name);
- ATraceBegin(StringPrintf("SuspendThreadByThreadId suspended %s id=%d",
- name.c_str(), thread_id).c_str());
- }
- VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
- return thread;
- }
- const uint64_t total_delay = NanoTime() - start_time;
- if (total_delay >= thread_suspend_timeout_ns_) {
- ThreadSuspendByThreadIdWarning(::android::base::WARNING,
- "Thread suspension timed out",
- thread_id);
- if (suspended_thread != nullptr) {
- bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, reason);
- DCHECK(updated);
+ if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) {
+ thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason);
+ if (thread->IsSuspended()) {
+ // See the discussion in mutator_gc_coord.md for the race here.
+ thread->RemoveFirstSuspend1Barrier();
+ if (!thread->HasActiveSuspendBarrier()) {
+ thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
+ }
+ is_suspended = true;
}
- *timed_out = true;
- return nullptr;
- } else if (sleep_us == 0 &&
- total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
- // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
- // excessive CPU usage.
- sleep_us = kThreadSuspendMaxYieldUs / 2;
+ DCHECK_GT(thread->GetSuspendCount(), 0);
+ break;
}
+ // Else we either hold the suspend count lock but another thread is trying to suspend us,
+ // making it unsafe to try to suspend another thread in case we get a cycle. Or we're
+ // currently in the middle of a flip, and could otherwise encounter b/31683379. In either
+ // case, start the loop again, which will allow this thread to be suspended.
}
- // Release locks and come out of runnable state.
}
- VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend";
- ThreadSuspendSleep(sleep_us);
- sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
+ // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry.
+ VLOG(threads) << "SuspendThreadByThreadId is sleeping";
+ usleep(kThreadSuspendSleepUs);
+ }
+ // Now wait for target to decrement suspend barrier.
+ if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) {
+ // wrapped_barrier.barrier_ has been decremented and will no longer be accessed.
+ VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
+ if (ATraceEnabled()) {
+ std::string name;
+ thread->GetThreadName(name);
+ ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for id=%d",
+ name.c_str(), thread_id).c_str());
+ }
+ return thread;
+ } else {
+ // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe without
+ // additional cleanup.
+ ThreadSuspendByThreadIdWarning(::android::base::FATAL,
+ "SuspendThreadByThreadId timed out",
+ thread_id);
+ UNREACHABLE();
}
}
@@ -1161,8 +1079,7 @@ void ThreadList::SuspendAllDaemonThreadsForShutdown() {
// daemons.
CHECK(thread->IsDaemon()) << *thread;
if (thread != self) {
- bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ thread->IncrementSuspendCount(self);
++daemons_left;
}
// We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
@@ -1262,11 +1179,10 @@ void ThreadList::Register(Thread* self) {
// SuspendAll requests.
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
- // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
- for (int delta = suspend_all_count_; delta > 0; delta--) {
- bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ if (suspend_all_count_ == 1) {
+ self->IncrementSuspendCount(self);
+ } else {
+ DCHECK_EQ(suspend_all_count_, 0);
}
CHECK(!Contains(self));
list_.push_back(self);
@@ -1372,13 +1288,11 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (Thread* thread : list_) {
- bool suspended = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
- DCHECK(suspended);
+ thread->IncrementSuspendCount(self);
if (thread == self || thread->IsSuspended()) {
threads_to_visit.push_back(thread);
} else {
- bool resumed = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(resumed);
+ thread->DecrementSuspendCount(self);
}
}
}
@@ -1393,8 +1307,7 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
{
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (Thread* thread : threads_to_visit) {
- bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ thread->DecrementSuspendCount(self);
}
}
}