diff options
author | 2023-12-19 18:48:15 +0000 | |
---|---|---|
committer | 2023-12-19 20:32:27 +0000 | |
commit | 8bc6a58df7046b4d6f4b51eb274c7e60fea396ff (patch) | |
tree | ac6aa639279f5cf2048cb147a91cbea98c8088a4 /runtime/thread-inl.h | |
parent | ee7471ec0a7aba338c3ac90de0f2ef0be9a35fed (diff) |
Revert^17 "Thread suspension cleanup and deadlock fix"
This reverts commit c6371b52df0da31acc174a3526274417b7aac0a7.
Reason for revert: This seems to have two remaining issues:
1. The second DCHECK in WaitForFlipFunction is not completely guaranteed to hold, resulting in failures for 658-fp-read-barrier.
2. WaitForSuspendBarrier seems to time out occasionally, possibly spuriously so. We fail when the futex times out once. That's probably incompatible with the app freezer. We should retry a few times.
Change-Id: Ibd8909b31083fc29e6d4f1fcde003d08eb16fc0a
Diffstat (limited to 'runtime/thread-inl.h')
-rw-r--r-- | runtime/thread-inl.h | 237 |
1 files changed, 49 insertions, 188 deletions
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index 3fd42a7d94..ce50471815 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -17,6 +17,8 @@ #ifndef ART_RUNTIME_THREAD_INL_H_ #define ART_RUNTIME_THREAD_INL_H_ +#include "thread.h" + #include "arch/instruction_set.h" #include "base/aborting.h" #include "base/casts.h" @@ -26,10 +28,8 @@ #include "jni/jni_env_ext.h" #include "managed_stack-inl.h" #include "obj_ptr-inl.h" -#include "runtime.h" +#include "suspend_reason.h" #include "thread-current-inl.h" -#include "thread.h" -#include "thread_list.h" #include "thread_pool.h" namespace art { @@ -81,15 +81,12 @@ inline void Thread::CheckSuspend(bool implicit) { break; } else if (state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest)) { RunCheckpointFunction(); - } else if (state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest) && - !state_and_flags.IsFlagSet(ThreadFlag::kSuspensionImmune)) { + } else if (state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)) { FullSuspendCheck(implicit); implicit = false; // We do not need to `MadviseAwayAlternateSignalStack()` anymore. - } else if (state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest)) { - RunEmptyCheckpoint(); } else { - DCHECK(state_and_flags.IsFlagSet(ThreadFlag::kSuspensionImmune)); - break; + DCHECK(state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest)); + RunEmptyCheckpoint(); } } if (implicit) { @@ -109,10 +106,9 @@ inline void Thread::CheckEmptyCheckpointFromWeakRefAccess(BaseMutex* cond_var_mu if (kIsDebugBuild) { for (int i = kLockLevelCount - 1; i >= 0; --i) { BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i)); - if (held_mutex != nullptr && held_mutex != GetMutatorLock() && - held_mutex != cond_var_mutex && - held_mutex != cp_placeholder_mutex_.load(std::memory_order_relaxed)) { - // placeholder_mutex may still be nullptr. That's OK. + if (held_mutex != nullptr && + held_mutex != GetMutatorLock() && + held_mutex != cond_var_mutex) { CHECK(Locks::IsExpectedOnWeakRefAccess(held_mutex)) << "Holding unexpected mutex " << held_mutex->GetName() << " when accessing weak ref"; @@ -253,8 +249,7 @@ inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state } } -inline void Thread::CheckActiveSuspendBarriers() { - DCHECK_NE(GetState(), ThreadState::kRunnable); +inline void Thread::PassActiveSuspendBarriers() { while (true) { StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed); if (LIKELY(!state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) && @@ -262,7 +257,7 @@ inline void Thread::CheckActiveSuspendBarriers() { !state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier))) { break; } else if (state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier)) { - PassActiveSuspendBarriers(); + PassActiveSuspendBarriers(this); } else { // Impossible LOG(FATAL) << "Fatal, thread transitioned into suspended without running the checkpoint"; @@ -270,20 +265,6 @@ inline void Thread::CheckActiveSuspendBarriers() { } } -inline void Thread::AddSuspend1Barrier(WrappedSuspend1Barrier* suspend1_barrier) { - suspend1_barrier->next_ = tlsPtr_.active_suspend1_barriers; - tlsPtr_.active_suspend1_barriers = suspend1_barrier; -} - -inline void Thread::RemoveFirstSuspend1Barrier() { - tlsPtr_.active_suspend1_barriers = tlsPtr_.active_suspend1_barriers->next_; -} - -inline bool Thread::HasActiveSuspendBarrier() { - return tlsPtr_.active_suspend1_barriers != nullptr || - tlsPtr_.active_suspendall_barrier != nullptr; -} - inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { // Note: JNI stubs inline a fast path of this method that transitions to suspended if // there are no flags set and then clears the `held_mutexes[kMutatorLock]` (this comes @@ -299,7 +280,7 @@ inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { // Mark the release of the share of the mutator lock. GetMutatorLock()->TransitionFromRunnableToSuspended(this); // Once suspended - check the active suspend barrier flag - CheckActiveSuspendBarriers(); + PassActiveSuspendBarriers(); } inline ThreadState Thread::TransitionFromSuspendedToRunnable() { @@ -309,7 +290,6 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { // inlined from the `GetMutatorLock()->TransitionFromSuspendedToRunnable(this)` below). // Therefore any code added here (other than debug build assertions) should be gated // on some flag being set, so that the JNI stub can take the slow path to get here. - DCHECK(this == Current()); StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); ThreadState old_state = old_state_and_flags.GetState(); DCHECK_NE(old_state, ThreadState::kRunnable); @@ -331,7 +311,7 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { break; } } else if (old_state_and_flags.IsFlagSet(ThreadFlag::kActiveSuspendBarrier)) { - PassActiveSuspendBarriers(); + PassActiveSuspendBarriers(this); } else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) || old_state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest))) { // Checkpoint flags should not be set while in suspended state. @@ -353,6 +333,7 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { thread_to_pass = this; } MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_); + ScopedTransitioningToRunnable scoped_transitioning_to_runnable(this); // Reload state and flags after locking the mutex. old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); DCHECK_EQ(old_state, old_state_and_flags.GetState()); @@ -364,15 +345,14 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { DCHECK_EQ(old_state, old_state_and_flags.GetState()); } DCHECK_EQ(GetSuspendCount(), 0); - } else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction))) { - DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)); - // Do this before transitioning to runnable, both because we shouldn't wait in a runnable - // state, and so that the thread running the flip function can DCHECK we're not runnable. + } else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) || + UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kWaitingForFlipFunction))) { + // It's possible that some thread runs this thread's flip-function in + // Thread::GetPeerFromOtherThread() even though it was runnable. WaitForFlipFunction(this); - } else if (old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) { - // Logically acquire mutator lock in shared mode. - DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)); - if (EnsureFlipFunctionStarted(this, this, old_state_and_flags)) { + } else { + DCHECK(old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)); + if (EnsureFlipFunctionStarted(this, old_state_and_flags)) { break; } } @@ -380,7 +360,6 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); DCHECK_EQ(old_state, old_state_and_flags.GetState()); } - DCHECK_EQ(this->GetState(), ThreadState::kRunnable); return static_cast<ThreadState>(old_state); } @@ -459,66 +438,35 @@ inline void Thread::PoisonObjectPointersIfDebug() { } } -inline void Thread::IncrementSuspendCount(Thread* self, - AtomicInteger* suspendall_barrier, - WrappedSuspend1Barrier* suspend1_barrier, - SuspendReason reason) { - if (kIsDebugBuild) { - Locks::thread_suspend_count_lock_->AssertHeld(self); - if (this != self) { - Locks::thread_list_lock_->AssertHeld(self); - } - } - if (UNLIKELY(reason == SuspendReason::kForUserCode)) { - Locks::user_code_suspension_lock_->AssertHeld(self); - } - - uint32_t flags = enum_cast<uint32_t>(ThreadFlag::kSuspendRequest); - if (suspendall_barrier != nullptr) { - DCHECK(suspend1_barrier == nullptr); - DCHECK(tlsPtr_.active_suspendall_barrier == nullptr); - tlsPtr_.active_suspendall_barrier = suspendall_barrier; - flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier); - } else if (suspend1_barrier != nullptr) { - AddSuspend1Barrier(suspend1_barrier); - flags |= enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier); - } - - ++tls32_.suspend_count; - if (reason == SuspendReason::kForUserCode) { - ++tls32_.user_code_suspend_count; - } - - // Two bits might be set simultaneously. - tls32_.state_and_flags.fetch_or(flags, std::memory_order_release); - TriggerSuspend(); -} - -inline void Thread::IncrementSuspendCount(Thread* self) { - IncrementSuspendCount(self, nullptr, nullptr, SuspendReason::kInternal); -} - -inline void Thread::DecrementSuspendCount(Thread* self, bool for_user_code) { - DCHECK(ReadFlag(ThreadFlag::kSuspendRequest)); - Locks::thread_suspend_count_lock_->AssertHeld(self); - if (UNLIKELY(tls32_.suspend_count <= 0)) { - UnsafeLogFatalForSuspendCount(self, this); - UNREACHABLE(); - } - if (for_user_code) { - Locks::user_code_suspension_lock_->AssertHeld(self); - if (UNLIKELY(tls32_.user_code_suspend_count <= 0)) { - LOG(ERROR) << "user_code_suspend_count incorrect"; - UnsafeLogFatalForSuspendCount(self, this); - UNREACHABLE(); +inline bool Thread::ModifySuspendCount(Thread* self, + int delta, + AtomicInteger* suspend_barrier, + SuspendReason reason) { + if (delta > 0 && + (((gUseUserfaultfd || gUseReadBarrier) && this != self) || suspend_barrier != nullptr)) { + // When delta > 0 (requesting a suspend), ModifySuspendCountInternal() may fail either if + // active_suspend_barriers is full or we are in the middle of a thread flip. Retry in a loop. + while (true) { + if (LIKELY(ModifySuspendCountInternal(self, delta, suspend_barrier, reason))) { + return true; + } else { + // Failure means the list of active_suspend_barriers is full or we are in the middle of a + // thread flip, we should release the thread_suspend_count_lock_ (to avoid deadlock) and + // wait till the target thread has executed or Thread::PassActiveSuspendBarriers() or the + // flip function. Note that we could not simply wait for the thread to change to a suspended + // state, because it might need to run checkpoint function before the state change or + // resumes from the resume_cond_, which also needs thread_suspend_count_lock_. + // + // The list of active_suspend_barriers is very unlikely to be full since more than + // kMaxSuspendBarriers threads need to execute SuspendAllInternal() simultaneously, and + // target thread stays in kRunnable in the mean time. + Locks::thread_suspend_count_lock_->ExclusiveUnlock(self); + NanoSleep(100000); + Locks::thread_suspend_count_lock_->ExclusiveLock(self); + } } - --tls32_.user_code_suspend_count; - } - - --tls32_.suspend_count; - - if (tls32_.suspend_count == 0) { - AtomicClearFlag(ThreadFlag::kSuspendRequest, std::memory_order_release); + } else { + return ModifySuspendCountInternal(self, delta, suspend_barrier, reason); } } @@ -551,93 +499,6 @@ inline void Thread::ResetDefaultStackEnd() { tlsPtr_.stack_end = tlsPtr_.stack_begin + GetStackOverflowReservedBytes(kRuntimeISA); } -inline void Thread::NotifyOnThreadExit(ThreadExitFlag* tef) { - DCHECK_EQ(tef->exited_, false); - DCHECK(tlsPtr_.thread_exit_flags == nullptr || !tlsPtr_.thread_exit_flags->exited_); - tef->next_ = tlsPtr_.thread_exit_flags; - tlsPtr_.thread_exit_flags = tef; - if (tef->next_ != nullptr) { - DCHECK(!tef->next_->HasExited()); - tef->next_->prev_ = tef; - } - tef->prev_ = nullptr; -} - -inline void Thread::UnregisterThreadExitFlag(ThreadExitFlag* tef) { - if (tef->HasExited()) { - // List is no longer used; each client will deallocate its own ThreadExitFlag. - return; - } - DCHECK(IsRegistered(tef)); - // Remove tef from the list. - if (tef->next_ != nullptr) { - tef->next_->prev_ = tef->prev_; - } - if (tef->prev_ == nullptr) { - DCHECK_EQ(tlsPtr_.thread_exit_flags, tef); - tlsPtr_.thread_exit_flags = tef->next_; - } else { - DCHECK_NE(tlsPtr_.thread_exit_flags, tef); - tef->prev_->next_ = tef->next_; - } - DCHECK(tlsPtr_.thread_exit_flags == nullptr || tlsPtr_.thread_exit_flags->prev_ == nullptr); -} - -inline void Thread::DCheckUnregisteredEverywhere(ThreadExitFlag* first, ThreadExitFlag* last) { - if (!kIsDebugBuild) { - return; - } - Thread* self = Thread::Current(); - MutexLock mu(self, *Locks::thread_list_lock_); - Runtime::Current()->GetThreadList()->ForEach([&](Thread* t) REQUIRES(Locks::thread_list_lock_) { - for (ThreadExitFlag* tef = t->tlsPtr_.thread_exit_flags; tef != nullptr; tef = tef->next_) { - CHECK(tef < first || tef > last) - << "tef = " << std::hex << tef << " first = " << first << std::dec; - } - // Also perform a minimal consistency check on each list. - ThreadExitFlag* flags = t->tlsPtr_.thread_exit_flags; - CHECK(flags == nullptr || flags->prev_ == nullptr); - }); -} - -inline bool Thread::IsRegistered(ThreadExitFlag* query_tef) { - for (ThreadExitFlag* tef = tlsPtr_.thread_exit_flags; tef != nullptr; tef = tef->next_) { - if (tef == query_tef) { - return true; - } - } - return false; -} - -inline void Thread::DisallowPreMonitorMutexes() { - if (kIsDebugBuild) { - CHECK(this == Thread::Current()); - CHECK(GetHeldMutex(kMonitorLock) == nullptr); - // Pretend we hold a kMonitorLock level mutex to detect disallowed mutex - // acquisitions by checkpoint Run() methods. We don't normally register or thus check - // kMonitorLock level mutexes, but this is an exception. - Mutex* ph = cp_placeholder_mutex_.load(std::memory_order_acquire); - if (UNLIKELY(ph == nullptr)) { - Mutex* new_ph = new Mutex("checkpoint placeholder mutex", kMonitorLock); - if (LIKELY(cp_placeholder_mutex_.compare_exchange_strong(ph, new_ph))) { - ph = new_ph; - } else { - // ph now has the value set by another thread. - delete new_ph; - } - } - SetHeldMutex(kMonitorLock, ph); - } -} - -// Undo the effect of the previous call. Again only invoked by the thread itself. -inline void Thread::AllowPreMonitorMutexes() { - if (kIsDebugBuild) { - CHECK_EQ(GetHeldMutex(kMonitorLock), cp_placeholder_mutex_.load(std::memory_order_relaxed)); - SetHeldMutex(kMonitorLock, nullptr); - } -} - } // namespace art #endif // ART_RUNTIME_THREAD_INL_H_ |