diff options
author | 2023-07-21 09:59:22 +0000 | |
---|---|---|
committer | 2023-07-25 15:11:43 +0000 | |
commit | b2777b10fd9d5eb494488251b01ad6e24f870f75 (patch) | |
tree | c8d8babb3e6e1453dd298292b159638f3f874166 | |
parent | 516d3191e9a3213fe5233f213953c4a8eeddad49 (diff) |
Ensure flip function is executed before another thread accesses stack
Currently, it's possible that some thread running checkpoint or
Thread::GetPeerFromOtherThread accesses another thread's thread-local
data structures while the flip function is pending. This would result in
accessing from-space referenes.
Test: art/test/testrunner/testrunner.py --host
Bug: 263557041
Change-Id: Ib79fe7155f1e1345c72aa39f2a6eb742ed1265f1
-rw-r--r-- | runtime/gc/collector/concurrent_copying.cc | 4 | ||||
-rw-r--r-- | runtime/stack.cc | 4 | ||||
-rw-r--r-- | runtime/thread-inl.h | 23 | ||||
-rw-r--r-- | runtime/thread.cc | 80 | ||||
-rw-r--r-- | runtime/thread.h | 33 | ||||
-rw-r--r-- | runtime/thread_list.cc | 38 |
6 files changed, 98 insertions, 84 deletions
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 48c595f1ce..787e997ba0 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -482,9 +482,7 @@ class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) { // Note: self is not necessarily equal to thread since thread may be suspended. Thread* self = Thread::Current(); - CHECK(thread == self || - thread->IsSuspended() || - thread->GetState() == ThreadState::kWaitingPerformingGc) + CHECK(thread == self || thread->GetState() != ThreadState::kRunnable) << thread->GetState() << " thread " << thread << " self " << self; thread->SetIsGcMarkingAndUpdateEntrypoints(true); if (use_tlab_ && thread->HasTlab()) { diff --git a/runtime/stack.cc b/runtime/stack.cc index d7d5851130..bf844b48e4 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -77,7 +77,7 @@ StackVisitor::StackVisitor(Thread* thread, context_(context), check_suspended_(check_suspended) { if (check_suspended_) { - DCHECK(thread == Thread::Current() || thread->IsSuspended()) << *thread; + DCHECK(thread == Thread::Current() || thread->GetState() != ThreadState::kRunnable) << *thread; } } @@ -801,7 +801,7 @@ uint8_t* StackVisitor::GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks template <StackVisitor::CountTransitions kCount> void StackVisitor::WalkStack(bool include_transitions) { if (check_suspended_) { - DCHECK(thread_ == Thread::Current() || thread_->IsSuspended()); + DCHECK(thread_ == Thread::Current() || thread_->GetState() != ThreadState::kRunnable); } CHECK_EQ(cur_depth_, 0U); diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index e4b3f6420c..ce50471815 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -347,25 +347,12 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { DCHECK_EQ(GetSuspendCount(), 0); } else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) || UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kWaitingForFlipFunction))) { - // The thread should be suspended while another thread is running the flip function. - static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u); - LOG(FATAL) << "Transitioning to Runnable while another thread is running the flip function," - // Note: Keeping unused flags. If they are set, it points to memory corruption. - << " flags=" << old_state_and_flags.WithState(ThreadState::kRunnable).GetValue() - << " state=" << old_state_and_flags.GetState(); + // It's possible that some thread runs this thread's flip-function in + // Thread::GetPeerFromOtherThread() even though it was runnable. + WaitForFlipFunction(this); } else { DCHECK(old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)); - // CAS the value with a memory barrier. - // Do not set `ThreadFlag::kRunningFlipFunction` as no other thread can run - // the flip function for a thread that is not suspended. - StateAndFlags new_state_and_flags = old_state_and_flags.WithState(ThreadState::kRunnable) - .WithoutFlag(ThreadFlag::kPendingFlipFunction); - if (LIKELY(tls32_.state_and_flags.CompareAndSetWeakAcquire(old_state_and_flags.GetValue(), - new_state_and_flags.GetValue()))) { - // Mark the acquisition of a share of the mutator lock. - GetMutatorLock()->TransitionFromSuspendedToRunnable(this); - // Run the flip function. - RunFlipFunction(this, /*notify=*/ false); + if (EnsureFlipFunctionStarted(this, old_state_and_flags)) { break; } } @@ -438,7 +425,7 @@ inline void Thread::RevokeThreadLocalAllocationStack() { if (kIsDebugBuild) { // Note: self is not necessarily equal to this thread since thread may be suspended. Thread* self = Thread::Current(); - DCHECK(this == self || IsSuspended() || GetState() == ThreadState::kWaitingPerformingGc) + DCHECK(this == self || GetState() != ThreadState::kRunnable) << GetState() << " thread " << this << " self " << self; } tlsPtr_.thread_local_alloc_stack_end = nullptr; diff --git a/runtime/thread.cc b/runtime/thread.cc index 7d0847c640..4c9d5efe42 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -144,6 +144,9 @@ thread_local Thread* Thread::self_tls_ = nullptr; #endif static constexpr bool kVerifyImageObjectsMarked = kIsDebugBuild; +// Amount of time (in microseconds) that we sleep if another thread is running +// flip function of the thread that we are interested in. +static constexpr size_t kSuspendTimeDuringFlip = 5'000; // For implicit overflow checks we reserve an extra piece of memory at the bottom // of the stack (lowest memory). The higher portion of the memory @@ -1605,25 +1608,8 @@ void Thread::ClearSuspendBarrier(AtomicInteger* target) { } void Thread::RunCheckpointFunction() { - // If this thread is suspended and another thread is running the checkpoint on its behalf, - // we may have a pending flip function that we need to run for the sake of those checkpoints - // that need to walk the stack. We should not see the flip function flags when the thread - // is running the checkpoint on its own. - StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed); - if (UNLIKELY(state_and_flags.IsAnyOfFlagsSet(FlipFunctionFlags()))) { - DCHECK(IsSuspended()); - Thread* self = Thread::Current(); - DCHECK(self != this); - if (state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) { - EnsureFlipFunctionStarted(self); - state_and_flags = GetStateAndFlags(std::memory_order_relaxed); - DCHECK(!state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)); - } - if (state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) { - WaitForFlipFunction(self); - } - } - + DCHECK_EQ(Thread::Current(), this); + CHECK(!GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags())); // Grab the suspend_count lock, get the next checkpoint and update all the checkpoint fields. If // there are no more checkpoints we will also clear the kCheckpointRequest flag. Closure* checkpoint; @@ -1811,7 +1797,7 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend !self->GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags())); EnsureFlipFunctionStarted(self); while (GetStateAndFlags(std::memory_order_acquire).IsAnyOfFlagsSet(FlipFunctionFlags())) { - sched_yield(); + usleep(kSuspendTimeDuringFlip); } function->Run(this); @@ -1823,12 +1809,8 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend DCHECK_NE(GetState(), ThreadState::kRunnable); bool updated = ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); DCHECK(updated); - } - - { // Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its // suspend count. Now the suspend_count_ is lowered so we must do the broadcast. - MutexLock mu2(self, *Locks::thread_suspend_count_lock_); Thread::resume_cond_->Broadcast(self); } @@ -1849,23 +1831,43 @@ void Thread::SetFlipFunction(Closure* function) { AtomicSetFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_release); } -void Thread::EnsureFlipFunctionStarted(Thread* self) { - while (true) { - StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); - if (!old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) { - return; - } +bool Thread::EnsureFlipFunctionStarted(Thread* self, StateAndFlags old_state_and_flags) { + bool become_runnable; + if (old_state_and_flags.GetValue() == 0) { + become_runnable = false; + old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); + } else { + become_runnable = true; + DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)); + } + + while (old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) { DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)); StateAndFlags new_state_and_flags = old_state_and_flags.WithFlag(ThreadFlag::kRunningFlipFunction) .WithoutFlag(ThreadFlag::kPendingFlipFunction); + if (become_runnable) { + DCHECK_EQ(self, this); + DCHECK_NE(self->GetState(), ThreadState::kRunnable); + new_state_and_flags = new_state_and_flags.WithState(ThreadState::kRunnable); + } if (tls32_.state_and_flags.CompareAndSetWeakAcquire(old_state_and_flags.GetValue(), new_state_and_flags.GetValue())) { + if (become_runnable) { + GetMutatorLock()->TransitionFromSuspendedToRunnable(this); + } + art::Locks::mutator_lock_->AssertSharedHeld(self); RunFlipFunction(self, /*notify=*/ true); DCHECK(!GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags())); - return; + return true; + } else { + old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); + if (become_runnable && old_state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)) { + break; + } } } + return false; } void Thread::RunFlipFunction(Thread* self, bool notify) { @@ -4712,16 +4714,14 @@ bool Thread::IsAotCompiler() { return Runtime::Current()->IsAotCompiler(); } -mirror::Object* Thread::GetPeerFromOtherThread() const { +mirror::Object* Thread::GetPeerFromOtherThread() { DCHECK(tlsPtr_.jpeer == nullptr); - mirror::Object* peer = tlsPtr_.opeer; - if (gUseReadBarrier && Current()->GetIsGcMarking()) { - // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack - // may have not been flipped yet and peer may be a from-space (stale) ref. So explicitly - // mark/forward it here. - peer = art::ReadBarrier::Mark(peer); - } - return peer; + // Ensure that opeer is not obsolete. + EnsureFlipFunctionStarted(Thread::Current()); + while (GetStateAndFlags(std::memory_order_acquire).IsAnyOfFlagsSet(FlipFunctionFlags())) { + usleep(kSuspendTimeDuringFlip); + } + return tlsPtr_.opeer; } void Thread::SetReadBarrierEntrypoints() { diff --git a/runtime/thread.h b/runtime/thread.h index 578f9899d4..08c803d064 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -382,15 +382,8 @@ class Thread { // Set the flip function. This is done with all threads suspended, except for the calling thread. void SetFlipFunction(Closure* function); - // Ensure that thread flip function started running. If no other thread is executing - // it, the calling thread shall run the flip function and then notify other threads - // that have tried to do that concurrently. After this function returns, the - // `ThreadFlag::kPendingFlipFunction` is cleared but another thread may still - // run the flip function as indicated by the `ThreadFlag::kRunningFlipFunction`. - void EnsureFlipFunctionStarted(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_); - // Wait for the flip function to complete if still running on another thread. - void WaitForFlipFunction(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_); + void WaitForFlipFunction(Thread* self); gc::accounting::AtomicStack<mirror::Object>* GetThreadLocalMarkStack() { CHECK(gUseReadBarrier); @@ -523,10 +516,12 @@ class Thread { CHECK(tlsPtr_.jpeer == nullptr); return tlsPtr_.opeer; } - // GetPeer is not safe if called on another thread in the middle of the CC thread flip and + // GetPeer is not safe if called on another thread in the middle of the thread flip and // the thread's stack may have not been flipped yet and peer may be a from-space (stale) ref. - // This function will explicitly mark/forward it. - mirror::Object* GetPeerFromOtherThread() const REQUIRES_SHARED(Locks::mutator_lock_); + // This function will force a flip for the other thread if necessary. + // Since we hold a shared mutator lock, a new flip function cannot be concurrently + // installed + mirror::Object* GetPeerFromOtherThread() REQUIRES_SHARED(Locks::mutator_lock_); bool HasPeer() const { return tlsPtr_.jpeer != nullptr || tlsPtr_.opeer != nullptr; @@ -1748,6 +1743,19 @@ class Thread { // to do that concurrently. void RunFlipFunction(Thread* self, bool notify) REQUIRES_SHARED(Locks::mutator_lock_); + // Ensure that thread flip function started running. If no other thread is executing + // it, the calling thread shall run the flip function and then notify other threads + // that have tried to do that concurrently. After this function returns, the + // `ThreadFlag::kPendingFlipFunction` is cleared but another thread may still + // run the flip function as indicated by the `ThreadFlag::kRunningFlipFunction`. + // A non-zero 'old_state_and_flags' indicates that the thread should logically + // acquire mutator lock if we win the race to run the flip function, if a + // suspend request is not already set. A zero 'old_state_and_flags' indicates + // we already hold the mutator lock. + // Returns true if this call ran the flip function. + bool EnsureFlipFunctionStarted(Thread* self, StateAndFlags old_state_and_flags = StateAndFlags(0)) + TRY_ACQUIRE_SHARED(true, Locks::mutator_lock_); + static void ThreadExitCallback(void* arg); // Maximum number of suspend barriers. @@ -2174,8 +2182,7 @@ class Thread { friend class QuickExceptionHandler; // For dumping the stack. friend class ScopedThreadStateChange; friend class StubTest; // For accessing entrypoints. - friend class ThreadList; // For ~Thread and Destroy. - + friend class ThreadList; // For ~Thread, Destroy and EnsureFlipFunctionStarted. friend class EntrypointsOrderTest; // To test the order of tls entries. friend class JniCompilerTest; // For intercepting JNI entrypoint calls. diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index e66ec899e2..5c7132425c 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -388,17 +388,39 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback // Run the checkpoint on ourself while we wait for threads to suspend. checkpoint_function->Run(self); + bool repeat = true; // Run the checkpoint on the suspended threads. - for (const auto& thread : suspended_count_modified_threads) { - // We know for sure that the thread is suspended at this point. - DCHECK(thread->IsSuspended()); - checkpoint_function->Run(thread); - { - MutexLock mu2(self, *Locks::thread_suspend_count_lock_); - bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); - DCHECK(updated); + while (repeat) { + repeat = false; + for (auto& thread : suspended_count_modified_threads) { + if (thread != nullptr) { + // We know for sure that the thread is suspended at this point. + DCHECK(thread->IsSuspended()); + // Make sure there is no pending flip function before running checkpoint + // on behalf of thread. + thread->EnsureFlipFunctionStarted(self); + if (thread->GetStateAndFlags(std::memory_order_acquire) + .IsAnyOfFlagsSet(Thread::FlipFunctionFlags())) { + // There is another thread running the flip function for 'thread'. + // Instead of waiting for it to complete, move to the next thread. + repeat = true; + continue; + } + checkpoint_function->Run(thread); + { + MutexLock mu2(self, *Locks::thread_suspend_count_lock_); + bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); + DCHECK(updated); + } + // We are done with 'thread' so set it to nullptr so that next outer + // loop iteration, if any, skips 'thread'. + thread = nullptr; + } } } + DCHECK(std::all_of(suspended_count_modified_threads.cbegin(), + suspended_count_modified_threads.cend(), + [](Thread* thread) { return thread == nullptr; })); { // Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their |