diff options
Diffstat (limited to 'runtime/thread_list.cc')
-rw-r--r-- | runtime/thread_list.cc | 527 |
1 files changed, 220 insertions, 307 deletions
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 6b23c349ef..77e373d7de 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -62,10 +62,7 @@ namespace art { using android::base::StringPrintf; static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5); -// Use 0 since we want to yield to prevent blocking for an unpredictable amount of time. -static constexpr useconds_t kThreadSuspendInitialSleepUs = 0; -static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000; -static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000; +static constexpr useconds_t kThreadSuspendSleepUs = 1000; // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for // some history. @@ -74,7 +71,7 @@ static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true; ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns) : suspend_all_count_(0), unregistering_count_(0), - suspend_all_historam_("suspend all histogram", 16, 64), + suspend_all_histogram_("suspend all histogram", 16, 64), long_suspend_(false), shut_down_(false), thread_suspend_timeout_ns_(thread_suspend_timeout_ns), @@ -135,10 +132,10 @@ void ThreadList::DumpForSigQuit(std::ostream& os) { { ScopedObjectAccess soa(Thread::Current()); // Only print if we have samples. - if (suspend_all_historam_.SampleSize() > 0) { + if (suspend_all_histogram_.SampleSize() > 0) { Histogram<uint64_t>::CumulativeData data; - suspend_all_historam_.CreateHistogram(&data); - suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend. + suspend_all_histogram_.CreateHistogram(&data); + suspend_all_histogram_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend. } } bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit(); @@ -260,11 +257,11 @@ void ThreadList::Dump(std::ostream& os, bool dump_native_stack) { } } -void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) { +void ThreadList::AssertOtherThreadsAreSuspended(Thread* self) { MutexLock mu(self, *Locks::thread_list_lock_); MutexLock mu2(self, *Locks::thread_suspend_count_lock_); for (const auto& thread : list_) { - if (thread != ignore1 && thread != ignore2) { + if (thread != self) { CHECK(thread->IsSuspended()) << "\nUnsuspended thread: <<" << *thread << "\n" << "self: <<" << *Thread::Current(); @@ -291,19 +288,6 @@ NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() { } #endif -// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an -// individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then -// we use sched_yield instead of calling usleep. -// Although there is the possibility, here and elsewhere, that usleep could return -1 and -// errno = EINTR, there should be no problem if interrupted, so we do not check. -static void ThreadSuspendSleep(useconds_t delay_us) { - if (delay_us == 0) { - sched_yield(); - } else { - usleep(delay_us); - } -} - size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) { Thread* self = Thread::Current(); Locks::mutator_lock_->AssertNotExclusiveHeld(self); @@ -326,9 +310,7 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback // This thread will run its checkpoint some time in the near future. if (requested_suspend) { // The suspend request is now unnecessary. - bool updated = - thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + thread->DecrementSuspendCount(self); requested_suspend = false; } break; @@ -339,9 +321,9 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback continue; } if (!requested_suspend) { - bool updated = - thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + // Since we don't suspend other threads to run checkpoint_function, we claim this is + // safe even with flip_function set. + thread->IncrementSuspendCount(self); requested_suspend = true; if (thread->IsSuspended()) { break; @@ -376,8 +358,7 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback checkpoint_function->Run(thread); { MutexLock mu2(self, *Locks::thread_suspend_count_lock_); - bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + thread->DecrementSuspendCount(self); } } @@ -516,14 +497,14 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, // ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the // pause. const uint64_t suspend_start_time = NanoTime(); - SuspendAllInternal(self, self, nullptr); + SuspendAllInternal(self); if (pause_listener != nullptr) { pause_listener->StartPause(); } // Run the flip callback for the collector. Locks::mutator_lock_->ExclusiveLock(self); - suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time); + suspend_all_histogram_.AdjustAndAddValue(NanoTime() - suspend_start_time); flip_callback->Run(self); Locks::mutator_lock_->ExclusiveUnlock(self); collector->RegisterPause(NanoTime() - suspend_start_time); @@ -554,8 +535,7 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, if ((state == ThreadState::kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) && thread->GetSuspendCount() == 1) { // The thread will resume right after the broadcast. - bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + thread->DecrementSuspendCount(self); ++runnable_thread_count; } else { other_threads.push_back(thread); @@ -584,8 +564,7 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings()); MutexLock mu2(self, *Locks::thread_suspend_count_lock_); for (const auto& thread : other_threads) { - bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + thread->DecrementSuspendCount(self); } Thread::resume_cond_->Broadcast(self); } @@ -593,6 +572,32 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, return runnable_thread_count + other_threads.size() + 1; // +1 for self. } +bool ThreadList::WaitForSuspendBarrier(AtomicInteger* barrier) { +#if ART_USE_FUTEXES + timespec wait_timeout; + InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout); +#endif + while (true) { + int32_t cur_val = barrier->load(std::memory_order_acquire); + if (cur_val <= 0) { + CHECK_EQ(cur_val, 0); + return true; + } +#if ART_USE_FUTEXES + if (futex(barrier->Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0) + != 0) { + if (errno == ETIMEDOUT) { + return false; + } else if (errno != EAGAIN && errno != EINTR) { + PLOG(FATAL) << "futex wait for suspend barrier failed"; + } + } +#endif + // Else spin wait. This is likely to be slow, but ART_USE_FUTEXES is set on Linux, + // including all targets. + } +} + void ThreadList::SuspendAll(const char* cause, bool long_suspend) { Thread* self = Thread::Current(); @@ -605,7 +610,7 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) { ScopedTrace trace("Suspending mutator threads"); const uint64_t start_time = NanoTime(); - SuspendAllInternal(self, self); + SuspendAllInternal(self); // All threads are known to have suspended (but a thread may still own the mutator lock) // Make sure this thread grabs exclusive access to the mutator lock and its protected data. #if HAVE_TIMED_RWLOCK @@ -629,14 +634,14 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) { const uint64_t end_time = NanoTime(); const uint64_t suspend_time = end_time - start_time; - suspend_all_historam_.AdjustAndAddValue(suspend_time); + suspend_all_histogram_.AdjustAndAddValue(suspend_time); if (suspend_time > kLongThreadSuspendThreshold) { LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time); } if (kDebugLocking) { // Debug check that all threads are suspended. - AssertThreadsAreSuspended(self, self); + AssertOtherThreadsAreSuspended(self); } } ATraceBegin((std::string("Mutator threads suspended for ") + cause).c_str()); @@ -649,10 +654,8 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) { } // Ensures all threads running Java suspend and that those not running Java don't start. -void ThreadList::SuspendAllInternal(Thread* self, - Thread* ignore1, - Thread* ignore2, - SuspendReason reason) { +void ThreadList::SuspendAllInternal(Thread* self, SuspendReason reason) { + const uint64_t start_time = NanoTime(); Locks::mutator_lock_->AssertNotExclusiveHeld(self); Locks::thread_list_lock_->AssertNotHeld(self); Locks::thread_suspend_count_lock_->AssertNotHeld(self); @@ -670,85 +673,77 @@ void ThreadList::SuspendAllInternal(Thread* self, // The atomic counter for number of threads that need to pass the barrier. AtomicInteger pending_threads; - uint32_t num_ignored = 0; - if (ignore1 != nullptr) { - ++num_ignored; - } - if (ignore2 != nullptr && ignore1 != ignore2) { - ++num_ignored; - } - { - MutexLock mu(self, *Locks::thread_list_lock_); - MutexLock mu2(self, *Locks::thread_suspend_count_lock_); - // Update global suspend all state for attaching threads. - ++suspend_all_count_; - pending_threads.store(list_.size() - num_ignored, std::memory_order_relaxed); - // Increment everybody's suspend count (except those that should be ignored). - for (const auto& thread : list_) { - if (thread == ignore1 || thread == ignore2) { - continue; - } - VLOG(threads) << "requesting thread suspend: " << *thread; - bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, reason); - DCHECK(updated); - - // Must install the pending_threads counter first, then check thread->IsSuspend() and clear - // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended() - // that can lead a thread to miss a call to PassActiveSuspendBarriers(). - if (thread->IsSuspended()) { - // Only clear the counter for the current thread. - thread->ClearSuspendBarrier(&pending_threads); - pending_threads.fetch_sub(1, std::memory_order_seq_cst); - } - } - } - - // Wait for the barrier to be passed by all runnable threads. This wait - // is done with a timeout so that we can detect problems. -#if ART_USE_FUTEXES - timespec wait_timeout; - InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout); -#endif - const uint64_t start_time = NanoTime(); while (true) { - int32_t cur_val = pending_threads.load(std::memory_order_relaxed); - if (LIKELY(cur_val > 0)) { -#if ART_USE_FUTEXES - if (futex(pending_threads.Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0) - != 0) { - if ((errno == EAGAIN) || (errno == EINTR)) { - // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning. - continue; + { + MutexLock mu(self, *Locks::thread_list_lock_); + MutexLock mu2(self, *Locks::thread_suspend_count_lock_); + bool in_flip = false; + for (const auto& thread : list_) { + if (thread->GetFlipFunction() != nullptr) { + in_flip = true; + break; } - if (errno == ETIMEDOUT) { - const uint64_t wait_time = NanoTime() - start_time; - MutexLock mu(self, *Locks::thread_list_lock_); - MutexLock mu2(self, *Locks::thread_suspend_count_lock_); - std::ostringstream oss; - for (const auto& thread : list_) { - if (thread == ignore1 || thread == ignore2) { - continue; - } - if (!thread->IsSuspended()) { - oss << std::endl << "Thread not suspended: " << *thread; + } + if (LIKELY(suspend_all_count_ == 0 + && (self == nullptr || self->GetSuspendCount() == 0) + && !in_flip)) { + // The above condition remains valid while we hold thread_suspend_count_lock_ . + bool found_myself = false; + // Update global suspend all state for attaching threads. + ++suspend_all_count_; + pending_threads.store(list_.size() - (self == nullptr ? 0 : 1), std::memory_order_relaxed); + // Increment everybody else's suspend count. + for (const auto& thread : list_) { + if (thread == self) { + found_myself = true; + } else { + VLOG(threads) << "requesting thread suspend: " << *thread; + DCHECK_EQ(suspend_all_count_, 1); + thread->IncrementSuspendCount(self, &pending_threads, nullptr, reason); + + // Must install the pending_threads counter first, then check thread->IsSuspended() + // and clear the counter. Otherwise there's a race with + // Thread::TransitionFromRunnableToSuspended() that can lead a thread to miss a call + // to PassActiveSuspendBarriers(). + if (thread->IsSuspended()) { + // Effectively pass the barrier on behalf of the already suspended thread. + DCHECK_EQ(thread->tlsPtr_.active_suspendall_barrier, &pending_threads); + pending_threads.fetch_sub(1, std::memory_order_seq_cst); + thread->tlsPtr_.active_suspendall_barrier = nullptr; + if (!thread->HasActiveSuspendBarrier()) { + thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier); + } } } - LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR) - << "Timed out waiting for threads to suspend, waited for " - << PrettyDuration(wait_time) - << oss.str(); - } else { - PLOG(FATAL) << "futex wait failed for SuspendAllInternal()"; } - } // else re-check pending_threads in the next iteration (this may be a spurious wake-up). -#else - // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set. - UNUSED(start_time); -#endif - } else { - CHECK_EQ(cur_val, 0); - break; + DCHECK(self == nullptr || found_myself); + break; + } } + // The if (LIKELY ...) condition above didn't hold. This is a bad time to initiate a suspend. + // Either the suspendall_barrier is already in use, or proceeding at this time risks deadlock. + // See b/31683379 for an explanation of the thread flip condition. Note that in the event of a + // competing Suspend or SuspendAll, we are about to be suspended anyway. We hold no locks, + // so it's safe to sleep and retry. + VLOG(threads) << "SuspendAll is sleeping"; + usleep(kThreadSuspendSleepUs); + // We're already not runnable, so an attempt to suspend us should succeed. + } + + if (!WaitForSuspendBarrier(&pending_threads)) { + const uint64_t wait_time = NanoTime() - start_time; + MutexLock mu(self, *Locks::thread_list_lock_); + MutexLock mu2(self, *Locks::thread_suspend_count_lock_); + std::ostringstream oss; + for (const auto& thread : list_) { + if (thread != self && !thread->IsSuspended()) { + oss << std::endl << "Thread not suspended: " << *thread; + } + } + LOG(::android::base::FATAL) + << "Timed out waiting for threads to suspend, waited for " + << PrettyDuration(wait_time) + << oss.str(); } } @@ -767,7 +762,7 @@ void ThreadList::ResumeAll() { if (kDebugLocking) { // Debug check that all threads are suspended. - AssertThreadsAreSuspended(self, self); + AssertOtherThreadsAreSuspended(self); } long_suspend_ = false; @@ -780,11 +775,9 @@ void ThreadList::ResumeAll() { --suspend_all_count_; // Decrement the suspend counts for all threads. for (const auto& thread : list_) { - if (thread == self) { - continue; + if (thread != self) { + thread->DecrementSuspendCount(self); } - bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(updated); } // Broadcast a notification to all suspended threads, some or all of @@ -829,11 +822,7 @@ bool ThreadList::Resume(Thread* thread, SuspendReason reason) { << ") thread not within thread list"; return false; } - if (UNLIKELY(!thread->ModifySuspendCount(self, -1, nullptr, reason))) { - LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread) - << ") could not modify suspend count."; - return false; - } + thread->DecrementSuspendCount(self, reason); } { @@ -863,40 +852,19 @@ static void ThreadSuspendByPeerWarning(Thread* self, } } -Thread* ThreadList::SuspendThreadByPeer(jobject peer, - SuspendReason reason, - bool* timed_out) { - bool request_suspension = true; - const uint64_t start_time = NanoTime(); - int self_suspend_count = 0; - useconds_t sleep_us = kThreadSuspendInitialSleepUs; - *timed_out = false; +Thread* ThreadList::SuspendThreadByPeer(jobject peer, SuspendReason reason) { + bool is_suspended = false; Thread* const self = Thread::Current(); - Thread* suspended_thread = nullptr; VLOG(threads) << "SuspendThreadByPeer starting"; + Thread* thread; + WrappedSuspend1Barrier wrapped_barrier{}; while (true) { - Thread* thread; { - // Note: this will transition to runnable and potentially suspend. We ensure only one thread - // is requesting another suspend, to avoid deadlock, by requiring this function be called - // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather - // than request thread suspension, to avoid potential cycles in threads requesting each other - // suspend. + // Note: this will transition to runnable and potentially suspend. ScopedObjectAccess soa(self); MutexLock thread_list_mu(self, *Locks::thread_list_lock_); thread = Thread::FromManagedThread(soa, peer); if (thread == nullptr) { - if (suspended_thread != nullptr) { - MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); - // If we incremented the suspend count but the thread reset its peer, we need to - // re-decrement it since it is shutting down and may deadlock the runtime in - // ThreadList::WaitForOtherNonDaemonThreadsToExit. - bool updated = suspended_thread->ModifySuspendCount(soa.Self(), - -1, - nullptr, - reason); - DCHECK(updated); - } ThreadSuspendByPeerWarning(self, ::android::base::WARNING, "No such thread for suspend", @@ -904,188 +872,138 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, return nullptr; } if (!Contains(thread)) { - CHECK(suspended_thread == nullptr); VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: " << reinterpret_cast<void*>(thread); return nullptr; } + // IsSuspended on the current thread will fail as the current thread is changed into + // Runnable above. As the suspend count is now raised if this is the current thread + // it will self suspend on transition to Runnable, making it hard to work with. It's simpler + // to just explicitly handle the current thread in the callers to this code. + CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread; { MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); - if (request_suspension) { - if (self->GetSuspendCount() > 0) { - // We hold the suspend count lock but another thread is trying to suspend us. Its not - // safe to try to suspend another thread in case we get a cycle. Start the loop again - // which will allow this thread to be suspended. - ++self_suspend_count; - continue; - } - CHECK(suspended_thread == nullptr); - suspended_thread = thread; - bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, reason); - DCHECK(updated); - request_suspension = false; - } else { - // If the caller isn't requesting suspension, a suspension should have already occurred. - CHECK_GT(thread->GetSuspendCount(), 0); - } - // IsSuspended on the current thread will fail as the current thread is changed into - // Runnable above. As the suspend count is now raised if this is the current thread - // it will self suspend on transition to Runnable, making it hard to work with. It's simpler - // to just explicitly handle the current thread in the callers to this code. - CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; - // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend - // count, or else we've waited and it has self suspended) or is the current thread, we're - // done. - if (thread->IsSuspended()) { - VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread; - if (ATraceEnabled()) { - std::string name; - thread->GetThreadName(name); - ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(), - peer).c_str()); - } - return thread; - } - const uint64_t total_delay = NanoTime() - start_time; - if (total_delay >= thread_suspend_timeout_ns_) { - if (suspended_thread == nullptr) { - ThreadSuspendByPeerWarning(self, - ::android::base::FATAL, - "Failed to issue suspend request", - peer); - } else { - CHECK_EQ(suspended_thread, thread); - LOG(WARNING) << "Suspended thread state_and_flags: " - << suspended_thread->StateAndFlagsAsHexString() - << ", self_suspend_count = " << self_suspend_count; - // Explicitly release thread_suspend_count_lock_; we haven't held it for long, so - // seeing threads blocked on it is not informative. - Locks::thread_suspend_count_lock_->Unlock(self); - ThreadSuspendByPeerWarning(self, - ::android::base::FATAL, - "Thread suspension timed out", - peer); + if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) { + thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason); + if (thread->IsSuspended()) { + // See the discussion in mutator_gc_coord.md for the race here. + thread->RemoveFirstSuspend1Barrier(); + if (!thread->HasActiveSuspendBarrier()) { + thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier); + } + is_suspended = true; } - UNREACHABLE(); - } else if (sleep_us == 0 && - total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { - // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent - // excessive CPU usage. - sleep_us = kThreadSuspendMaxYieldUs / 2; + DCHECK_GT(thread->GetSuspendCount(), 0); + break; } + // Else we either hold the suspend count lock but another thread is trying to suspend us, + // making it unsafe to try to suspend another thread in case we get a cycle. Or we're + // currently in the middle of a flip, and could otherwise encounter b/31683379. In either + // case, start the loop again, which will allow this thread to be suspended. } - // Release locks and come out of runnable state. } - VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend"; - ThreadSuspendSleep(sleep_us); - // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at - // all if possible. This shouldn't be an issue since time to suspend should always be small. - sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); + // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry. + VLOG(threads) << "SuspendThreadByPeer is sleeping"; + usleep(kThreadSuspendSleepUs); + } + // Now wait for target to decrement suspend barrier. + if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) { + // wrapped_barrier.barrier_ has been decremented and will no longer be accessed. + VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread; + if (ATraceEnabled()) { + std::string name; + thread->GetThreadName(name); + ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(), + peer).c_str()); + } + return thread; + } else { + LOG(WARNING) << "Suspended thread state_and_flags: " + << thread->StateAndFlagsAsHexString(); + // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe + // without additional cleanup. + ThreadSuspendByPeerWarning(self, + ::android::base::FATAL, + "SuspendThreadByPeer timed out", + peer); + UNREACHABLE(); } } + static void ThreadSuspendByThreadIdWarning(LogSeverity severity, const char* message, uint32_t thread_id) { LOG(severity) << StringPrintf("%s: %d", message, thread_id); } -Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, - SuspendReason reason, - bool* timed_out) { - const uint64_t start_time = NanoTime(); - useconds_t sleep_us = kThreadSuspendInitialSleepUs; - *timed_out = false; - Thread* suspended_thread = nullptr; +Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason) { + bool is_suspended = false; Thread* const self = Thread::Current(); CHECK_NE(thread_id, kInvalidThreadId); VLOG(threads) << "SuspendThreadByThreadId starting"; + Thread* thread; + WrappedSuspend1Barrier wrapped_barrier{}; while (true) { { - // Note: this will transition to runnable and potentially suspend. We ensure only one thread - // is requesting another suspend, to avoid deadlock, by requiring this function be called - // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather - // than request thread suspension, to avoid potential cycles in threads requesting each other - // suspend. + // Note: this will transition to runnable and potentially suspend. ScopedObjectAccess soa(self); MutexLock thread_list_mu(self, *Locks::thread_list_lock_); - Thread* thread = nullptr; - for (const auto& it : list_) { - if (it->GetThreadId() == thread_id) { - thread = it; - break; - } - } + thread = FindThreadByThreadId(thread_id); if (thread == nullptr) { - CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread - << " no longer in thread list"; // There's a race in inflating a lock and the owner giving up ownership and then dying. ThreadSuspendByThreadIdWarning(::android::base::WARNING, "No such thread id for suspend", thread_id); return nullptr; } - VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread; DCHECK(Contains(thread)); + CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; + VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread; { MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); - if (suspended_thread == nullptr) { - if (self->GetSuspendCount() > 0) { - // We hold the suspend count lock but another thread is trying to suspend us. Its not - // safe to try to suspend another thread in case we get a cycle. Start the loop again - // which will allow this thread to be suspended. - continue; - } - bool updated = thread->ModifySuspendCount(self, +1, nullptr, reason); - DCHECK(updated); - suspended_thread = thread; - } else { - CHECK_EQ(suspended_thread, thread); - // If the caller isn't requesting suspension, a suspension should have already occurred. - CHECK_GT(thread->GetSuspendCount(), 0); - } - // IsSuspended on the current thread will fail as the current thread is changed into - // Runnable above. As the suspend count is now raised if this is the current thread - // it will self suspend on transition to Runnable, making it hard to work with. It's simpler - // to just explicitly handle the current thread in the callers to this code. - CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; - // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend - // count, or else we've waited and it has self suspended) or is the current thread, we're - // done. - if (thread->IsSuspended()) { - if (ATraceEnabled()) { - std::string name; - thread->GetThreadName(name); - ATraceBegin(StringPrintf("SuspendThreadByThreadId suspended %s id=%d", - name.c_str(), thread_id).c_str()); - } - VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread; - return thread; - } - const uint64_t total_delay = NanoTime() - start_time; - if (total_delay >= thread_suspend_timeout_ns_) { - ThreadSuspendByThreadIdWarning(::android::base::WARNING, - "Thread suspension timed out", - thread_id); - if (suspended_thread != nullptr) { - bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, reason); - DCHECK(updated); + if (LIKELY(self->GetSuspendCount() == 0 && thread->GetFlipFunction() == nullptr)) { + thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason); + if (thread->IsSuspended()) { + // See the discussion in mutator_gc_coord.md for the race here. + thread->RemoveFirstSuspend1Barrier(); + if (!thread->HasActiveSuspendBarrier()) { + thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier); + } + is_suspended = true; } - *timed_out = true; - return nullptr; - } else if (sleep_us == 0 && - total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { - // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent - // excessive CPU usage. - sleep_us = kThreadSuspendMaxYieldUs / 2; + DCHECK_GT(thread->GetSuspendCount(), 0); + break; } + // Else we either hold the suspend count lock but another thread is trying to suspend us, + // making it unsafe to try to suspend another thread in case we get a cycle. Or we're + // currently in the middle of a flip, and could otherwise encounter b/31683379. In either + // case, start the loop again, which will allow this thread to be suspended. } - // Release locks and come out of runnable state. } - VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend"; - ThreadSuspendSleep(sleep_us); - sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); + // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry. + VLOG(threads) << "SuspendThreadByThreadId is sleeping"; + usleep(kThreadSuspendSleepUs); + } + // Now wait for target to decrement suspend barrier. + if (is_suspended || WaitForSuspendBarrier(&wrapped_barrier.barrier_)) { + // wrapped_barrier.barrier_ has been decremented and will no longer be accessed. + VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread; + if (ATraceEnabled()) { + std::string name; + thread->GetThreadName(name); + ATraceBegin(StringPrintf("SuspendThreadByPeer suspended %s for id=%d", + name.c_str(), thread_id).c_str()); + } + return thread; + } else { + // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe without + // additional cleanup. + ThreadSuspendByThreadIdWarning(::android::base::FATAL, + "SuspendThreadByThreadId timed out", + thread_id); + UNREACHABLE(); } } @@ -1161,8 +1079,7 @@ void ThreadList::SuspendAllDaemonThreadsForShutdown() { // daemons. CHECK(thread->IsDaemon()) << *thread; if (thread != self) { - bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + thread->IncrementSuspendCount(self); ++daemons_left; } // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be @@ -1262,11 +1179,10 @@ void ThreadList::Register(Thread* self) { // SuspendAll requests. MutexLock mu(self, *Locks::thread_list_lock_); MutexLock mu2(self, *Locks::thread_suspend_count_lock_); - // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While - // this isn't particularly efficient the suspend counts are most commonly 0 or 1. - for (int delta = suspend_all_count_; delta > 0; delta--) { - bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + if (suspend_all_count_ == 1) { + self->IncrementSuspendCount(self); + } else { + DCHECK_EQ(suspend_all_count_, 0); } CHECK(!Contains(self)); list_.push_back(self); @@ -1372,13 +1288,11 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) { MutexLock mu(self, *Locks::thread_list_lock_); MutexLock mu2(self, *Locks::thread_suspend_count_lock_); for (Thread* thread : list_) { - bool suspended = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); - DCHECK(suspended); + thread->IncrementSuspendCount(self); if (thread == self || thread->IsSuspended()) { threads_to_visit.push_back(thread); } else { - bool resumed = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(resumed); + thread->DecrementSuspendCount(self); } } } @@ -1393,8 +1307,7 @@ void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) { { MutexLock mu2(self, *Locks::thread_suspend_count_lock_); for (Thread* thread : threads_to_visit) { - bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + thread->DecrementSuspendCount(self); } } } |