summaryrefslogtreecommitdiff
path: root/runtime/thread.cc
diff options
context:
space:
mode:
author Hans Boehm <hboehm@google.com> 2022-09-10 00:08:42 +0000
committer Hans Boehm <hboehm@google.com> 2022-09-10 00:08:42 +0000
commit7c8835df16147b9096dd4c9380ab4b5f700ea17d (patch)
treece0f48061b3ba7d5dc1da4d0dda78520f2629d4a /runtime/thread.cc
parent7c39c86b17c91e651de1fcc0876fe5565e3f5204 (diff)
Revert "Thread suspension cleanup and deadlock fix"
This reverts commit 7c39c86b17c91e651de1fcc0876fe5565e3f5204. Reason for revert: We're see a number of new, somewhat rare, timeouts on multiple tests. Change-Id: Ida9a4f80b64b6fedc16db176a8df9c2e985ef482
Diffstat (limited to 'runtime/thread.cc')
-rw-r--r--runtime/thread.cc317
1 files changed, 205 insertions, 112 deletions
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6de88e6973..d233e6f174 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1436,8 +1436,7 @@ uint64_t Thread::GetCpuMicroTime() const {
}
// Attempt to rectify locks so that we dump thread list with required locks before exiting.
-void Thread::UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread)
- NO_THREAD_SAFETY_ANALYSIS {
+static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
LOG(ERROR) << *thread << " suspend count already zero.";
Locks::thread_suspend_count_lock_->Unlock(self);
if (!Locks::mutator_lock_->IsSharedHeld(self)) {
@@ -1455,51 +1454,141 @@ void Thread::UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread)
std::ostringstream ss;
Runtime::Current()->GetThreadList()->Dump(ss);
LOG(FATAL) << ss.str();
- UNREACHABLE();
}
-bool Thread::PassActiveSuspendBarriers() {
- DCHECK_EQ(this, Thread::Current());
- // Grab the suspend_count lock and copy the current set of barriers. Then clear the list and the
- // flag. The IncrementSuspendCount function requires the lock so we prevent a race between setting
+bool Thread::ModifySuspendCountInternal(Thread* self,
+ int delta,
+ AtomicInteger* suspend_barrier,
+ SuspendReason reason) {
+ if (kIsDebugBuild) {
+ DCHECK(delta == -1 || delta == +1)
+ << reason << " " << delta << " " << this;
+ Locks::thread_suspend_count_lock_->AssertHeld(self);
+ if (this != self && !IsSuspended()) {
+ Locks::thread_list_lock_->AssertHeld(self);
+ }
+ }
+ // User code suspensions need to be checked more closely since they originate from code outside of
+ // the runtime's control.
+ if (UNLIKELY(reason == SuspendReason::kForUserCode)) {
+ Locks::user_code_suspension_lock_->AssertHeld(self);
+ if (UNLIKELY(delta + tls32_.user_code_suspend_count < 0)) {
+ LOG(ERROR) << "attempting to modify suspend count in an illegal way.";
+ return false;
+ }
+ }
+ if (UNLIKELY(delta < 0 && tls32_.suspend_count <= 0)) {
+ UnsafeLogFatalForSuspendCount(self, this);
+ return false;
+ }
+
+ if (gUseReadBarrier && delta > 0 && this != self && tlsPtr_.flip_function != nullptr) {
+ // Force retry of a suspend request if it's in the middle of a thread flip to avoid a
+ // deadlock. b/31683379.
+ return false;
+ }
+
+ uint32_t flags = enum_cast<uint32_t>(ThreadFlag::kSuspendRequest);
+ if (delta > 0 && suspend_barrier != nullptr) {
+ uint32_t available_barrier = kMaxSuspendBarriers;
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ if (tlsPtr_.active_suspend_barriers[i] == nullptr) {
+ available_barrier = i;
+ break;
+ }
+ }
+ if (available_barrier == kMaxSuspendBarriers) {
+ // No barrier spaces available, we can't add another.
+ return false;
+ }
+ tlsPtr_.active_suspend_barriers[available_barrier] = suspend_barrier;
+ flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
+ }
+
+ tls32_.suspend_count += delta;
+ switch (reason) {
+ case SuspendReason::kForUserCode:
+ tls32_.user_code_suspend_count += delta;
+ break;
+ case SuspendReason::kInternal:
+ break;
+ }
+
+ if (tls32_.suspend_count == 0) {
+ AtomicClearFlag(ThreadFlag::kSuspendRequest);
+ } else {
+ // Two bits might be set simultaneously.
+ tls32_.state_and_flags.fetch_or(flags, std::memory_order_seq_cst);
+ TriggerSuspend();
+ }
+ return true;
+}
+
+bool Thread::PassActiveSuspendBarriers(Thread* self) {
+ // Grab the suspend_count lock and copy the current set of
+ // barriers. Then clear the list and the flag. The ModifySuspendCount
+ // function requires the lock so we prevent a race between setting
// the kActiveSuspendBarrier flag and clearing it.
- std::vector<AtomicInteger*> pass_barriers{};
+ AtomicInteger* pass_barriers[kMaxSuspendBarriers];
{
- MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+ MutexLock mu(self, *Locks::thread_suspend_count_lock_);
if (!ReadFlag(ThreadFlag::kActiveSuspendBarrier)) {
- // quick exit test: the barriers have already been claimed - this is possible as there may
- // be a race to claim and it doesn't matter who wins.
- // All of the callers of this function (except the SuspendAllInternal) will first test the
- // kActiveSuspendBarrier flag without lock. Here double-check whether the barrier has been
- // passed with the suspend_count lock.
+ // quick exit test: the barriers have already been claimed - this is
+ // possible as there may be a race to claim and it doesn't matter
+ // who wins.
+ // All of the callers of this function (except the SuspendAllInternal)
+ // will first test the kActiveSuspendBarrier flag without lock. Here
+ // double-check whether the barrier has been passed with the
+ // suspend_count lock.
return false;
}
- if (tlsPtr_.active_suspendall_barrier != nullptr) {
- pass_barriers.push_back(tlsPtr_.active_suspendall_barrier);
- tlsPtr_.active_suspendall_barrier = nullptr;
- }
- for (WrappedSuspend1Barrier* w = tlsPtr_.active_suspend1_barriers; w != nullptr; w = w->next_) {
- pass_barriers.push_back(&(w->barrier_));
+
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ pass_barriers[i] = tlsPtr_.active_suspend_barriers[i];
+ tlsPtr_.active_suspend_barriers[i] = nullptr;
}
- tlsPtr_.active_suspend1_barriers = nullptr;
AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
}
uint32_t barrier_count = 0;
- for (AtomicInteger* barrier : pass_barriers) {
- ++barrier_count;
- int32_t old_val = barrier->fetch_sub(1, std::memory_order_release);
- CHECK_GT(old_val, 0) << "Unexpected value for PassActiveSuspendBarriers(): " << old_val;
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; i++) {
+ AtomicInteger* pending_threads = pass_barriers[i];
+ if (pending_threads != nullptr) {
+ bool done = false;
+ do {
+ int32_t cur_val = pending_threads->load(std::memory_order_relaxed);
+ CHECK_GT(cur_val, 0) << "Unexpected value for PassActiveSuspendBarriers(): " << cur_val;
+ // Reduce value by 1.
+ done = pending_threads->CompareAndSetWeakRelaxed(cur_val, cur_val - 1);
#if ART_USE_FUTEXES
- if (old_val == 1) {
- futex(barrier->Address(), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
- }
+ if (done && (cur_val - 1) == 0) { // Weak CAS may fail spuriously.
+ futex(pending_threads->Address(), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
+ }
#endif
+ } while (!done);
+ ++barrier_count;
+ }
}
CHECK_GT(barrier_count, 0U);
return true;
}
+void Thread::ClearSuspendBarrier(AtomicInteger* target) {
+ CHECK(ReadFlag(ThreadFlag::kActiveSuspendBarrier));
+ bool clear_flag = true;
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ AtomicInteger* ptr = tlsPtr_.active_suspend_barriers[i];
+ if (ptr == target) {
+ tlsPtr_.active_suspend_barriers[i] = nullptr;
+ } else if (ptr != nullptr) {
+ clear_flag = false;
+ }
+ }
+ if (LIKELY(clear_flag)) {
+ AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
+ }
+}
+
void Thread::RunCheckpointFunction() {
// If this thread is suspended and another thread is running the checkpoint on its behalf,
// we may have a pending flip function that we need to run for the sake of those checkpoints
@@ -1551,25 +1640,28 @@ void Thread::RunEmptyCheckpoint() {
}
bool Thread::RequestCheckpoint(Closure* function) {
- StateAndFlags old_state_and_flags(0 /* unused */), new_state_and_flags(0 /* unused */);
- do {
- old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
- if (old_state_and_flags.GetState() != ThreadState::kRunnable) {
- return false; // Fail, thread is suspended and so can't run a checkpoint.
- }
- new_state_and_flags = old_state_and_flags;
- new_state_and_flags.SetFlag(ThreadFlag::kCheckpointRequest);
- } while (!tls32_.state_and_flags.CompareAndSetWeakSequentiallyConsistent(
- old_state_and_flags.GetValue(), new_state_and_flags.GetValue()));
- // Succeeded setting checkpoint flag, now insert the actual checkpoint.
- if (tlsPtr_.checkpoint_function == nullptr) {
- tlsPtr_.checkpoint_function = function;
- } else {
- checkpoint_overflow_.push_back(function);
+ StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
+ if (old_state_and_flags.GetState() != ThreadState::kRunnable) {
+ return false; // Fail, thread is suspended and so can't run a checkpoint.
}
- DCHECK(ReadFlag(ThreadFlag::kCheckpointRequest));
- TriggerSuspend();
- return true;
+
+ // We must be runnable to request a checkpoint.
+ DCHECK_EQ(old_state_and_flags.GetState(), ThreadState::kRunnable);
+ StateAndFlags new_state_and_flags = old_state_and_flags;
+ new_state_and_flags.SetFlag(ThreadFlag::kCheckpointRequest);
+ bool success = tls32_.state_and_flags.CompareAndSetStrongSequentiallyConsistent(
+ old_state_and_flags.GetValue(), new_state_and_flags.GetValue());
+ if (success) {
+ // Succeeded setting checkpoint flag, now insert the actual checkpoint.
+ if (tlsPtr_.checkpoint_function == nullptr) {
+ tlsPtr_.checkpoint_function = function;
+ } else {
+ checkpoint_overflow_.push_back(function);
+ }
+ CHECK(ReadFlag(ThreadFlag::kCheckpointRequest));
+ TriggerSuspend();
+ }
+ return success;
}
bool Thread::RequestEmptyCheckpoint() {
@@ -1648,85 +1740,85 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend
Thread* self_thread;
};
- Locks::thread_list_lock_->AssertExclusiveHeld(self);
- // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
- // suspend-count lock for too long.
- if (GetState() == ThreadState::kRunnable) {
- BarrierClosure barrier_closure(function);
- bool installed = false;
- {
- MutexLock mu(self, *Locks::thread_suspend_count_lock_);
- installed = RequestCheckpoint(&barrier_closure);
- }
- if (installed) {
- // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
- // reacquire it since we don't know if 'this' hasn't been deleted yet.
- Locks::thread_list_lock_->ExclusiveUnlock(self);
- ScopedThreadStateChange sts(self, suspend_state);
- barrier_closure.Wait(self, suspend_state);
- return true;
+ for (;;) {
+ Locks::thread_list_lock_->AssertExclusiveHeld(self);
+ // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
+ // suspend-count lock for too long.
+ if (GetState() == ThreadState::kRunnable) {
+ BarrierClosure barrier_closure(function);
+ bool installed = false;
+ {
+ MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+ installed = RequestCheckpoint(&barrier_closure);
+ }
+ if (installed) {
+ // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
+ // reacquire it since we don't know if 'this' hasn't been deleted yet.
+ Locks::thread_list_lock_->ExclusiveUnlock(self);
+ ScopedThreadStateChange sts(self, suspend_state);
+ barrier_closure.Wait(self, suspend_state);
+ return true;
+ }
+ // Fall-through.
}
- // No longer runnable. Fall-through.
- }
- // This thread is not runnable, make sure we stay suspended, then run the checkpoint.
- // Note: IncrementSuspendCount also expects the thread_list_lock to be held in
- // certain situations.
- while (true) {
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ // This thread is not runnable, make sure we stay suspended, then run the checkpoint.
+ // Note: ModifySuspendCountInternal also expects the thread_list_lock to be held in
+ // certain situations.
+ {
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- if (GetFlipFunction() == nullptr) {
- IncrementSuspendCount(self);
- break;
+ if (!ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal)) {
+ // Just retry the loop.
+ sched_yield();
+ continue;
+ }
}
- DCHECK(gUseReadBarrier); // Flip functions are only used by CC collector.
- sched_yield();
- }
- {
- // Release for the wait. The suspension will keep the target thread from being unregistered
- // and deleted. Reacquire after so that we can call DecrementSuspendCount without racing
- // against ThreadList::Unregister.
- ScopedThreadListLockUnlock stllu(self);
{
- ScopedThreadStateChange sts(self, suspend_state);
- while (GetState() == ThreadState::kRunnable) {
- // The target thread became runnable again. Wait till the suspend triggered in
- // IncrementSuspendCount moves the target thread to suspended.
- sched_yield();
+ // Release for the wait. The suspension will keep us from being deleted. Reacquire after so
+ // that we can call ModifySuspendCount without racing against ThreadList::Unregister.
+ ScopedThreadListLockUnlock stllu(self);
+ {
+ ScopedThreadStateChange sts(self, suspend_state);
+ while (GetState() == ThreadState::kRunnable) {
+ // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
+ // moves us to suspended.
+ sched_yield();
+ }
}
- }
- function->Run(this);
- }
+ function->Run(this);
+ }
- {
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ {
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- // Target was in a suspended state with a nonzero suspend count, so it must stay suspended.
- DCHECK_NE(GetState(), ThreadState::kRunnable);
- DecrementSuspendCount(self);
- }
+ DCHECK_NE(GetState(), ThreadState::kRunnable);
+ bool updated = ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
+ }
- {
- // Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its
- // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- Thread::resume_cond_->Broadcast(self);
- }
+ {
+ // Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its
+ // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ Thread::resume_cond_->Broadcast(self);
+ }
- // Release the thread_list_lock_ to be consistent with the barrier-closure path.
- Locks::thread_list_lock_->ExclusiveUnlock(self);
+ // Release the thread_list_lock_ to be consistent with the barrier-closure path.
+ Locks::thread_list_lock_->ExclusiveUnlock(self);
- return true; // We're done, break out of the loop.
+ return true; // We're done, break out of the loop.
+ }
}
void Thread::SetFlipFunction(Closure* function) {
// This is called with all threads suspended, except for the calling thread.
DCHECK(IsSuspended() || Thread::Current() == this);
DCHECK(function != nullptr);
- DCHECK(GetFlipFunction() == nullptr);
- tlsPtr_.flip_function.store(function, std::memory_order_relaxed);
+ DCHECK(tlsPtr_.flip_function == nullptr);
+ tlsPtr_.flip_function = function;
DCHECK(!GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags()));
AtomicSetFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_release);
}
@@ -1758,8 +1850,8 @@ void Thread::RunFlipFunction(Thread* self, bool notify) {
// no other thread may run checkpoints on a thread that's actually Runnable.
DCHECK_EQ(notify, ReadFlag(ThreadFlag::kRunningFlipFunction));
- Closure* flip_function = GetFlipFunction();
- tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed);
+ Closure* flip_function = tlsPtr_.flip_function;
+ tlsPtr_.flip_function = nullptr;
DCHECK(flip_function != nullptr);
flip_function->Run(this);
@@ -2354,9 +2446,10 @@ Thread::Thread(bool daemon)
tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
gc::allocator::RosAlloc::GetDedicatedFullRun());
tlsPtr_.checkpoint_function = nullptr;
- tlsPtr_.active_suspendall_barrier = nullptr;
- tlsPtr_.active_suspend1_barriers = nullptr;
- tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed);
+ for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
+ tlsPtr_.active_suspend_barriers[i] = nullptr;
+ }
+ tlsPtr_.flip_function = nullptr;
tlsPtr_.thread_local_mark_stack = nullptr;
tls32_.is_transitioning_to_runnable = false;
ResetTlab();
@@ -2504,7 +2597,7 @@ Thread::~Thread() {
CHECK(!ReadFlag(ThreadFlag::kEmptyCheckpointRequest));
CHECK(tlsPtr_.checkpoint_function == nullptr);
CHECK_EQ(checkpoint_overflow_.size(), 0u);
- CHECK(GetFlipFunction() == nullptr);
+ CHECK(tlsPtr_.flip_function == nullptr);
CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
// Make sure we processed all deoptimization requests.