summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Lokesh Gidra <lokeshgidra@google.com> 2023-07-21 09:59:22 +0000
committer Hans Boehm <hboehm@google.com> 2023-07-25 15:11:43 +0000
commitb2777b10fd9d5eb494488251b01ad6e24f870f75 (patch)
treec8d8babb3e6e1453dd298292b159638f3f874166
parent516d3191e9a3213fe5233f213953c4a8eeddad49 (diff)
Ensure flip function is executed before another thread accesses stack
Currently, it's possible that some thread running checkpoint or Thread::GetPeerFromOtherThread accesses another thread's thread-local data structures while the flip function is pending. This would result in accessing from-space referenes. Test: art/test/testrunner/testrunner.py --host Bug: 263557041 Change-Id: Ib79fe7155f1e1345c72aa39f2a6eb742ed1265f1
-rw-r--r--runtime/gc/collector/concurrent_copying.cc4
-rw-r--r--runtime/stack.cc4
-rw-r--r--runtime/thread-inl.h23
-rw-r--r--runtime/thread.cc80
-rw-r--r--runtime/thread.h33
-rw-r--r--runtime/thread_list.cc38
6 files changed, 98 insertions, 84 deletions
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 48c595f1ce..787e997ba0 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -482,9 +482,7 @@ class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor
void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
// Note: self is not necessarily equal to thread since thread may be suspended.
Thread* self = Thread::Current();
- CHECK(thread == self ||
- thread->IsSuspended() ||
- thread->GetState() == ThreadState::kWaitingPerformingGc)
+ CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
<< thread->GetState() << " thread " << thread << " self " << self;
thread->SetIsGcMarkingAndUpdateEntrypoints(true);
if (use_tlab_ && thread->HasTlab()) {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index d7d5851130..bf844b48e4 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -77,7 +77,7 @@ StackVisitor::StackVisitor(Thread* thread,
context_(context),
check_suspended_(check_suspended) {
if (check_suspended_) {
- DCHECK(thread == Thread::Current() || thread->IsSuspended()) << *thread;
+ DCHECK(thread == Thread::Current() || thread->GetState() != ThreadState::kRunnable) << *thread;
}
}
@@ -801,7 +801,7 @@ uint8_t* StackVisitor::GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks
template <StackVisitor::CountTransitions kCount>
void StackVisitor::WalkStack(bool include_transitions) {
if (check_suspended_) {
- DCHECK(thread_ == Thread::Current() || thread_->IsSuspended());
+ DCHECK(thread_ == Thread::Current() || thread_->GetState() != ThreadState::kRunnable);
}
CHECK_EQ(cur_depth_, 0U);
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index e4b3f6420c..ce50471815 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -347,25 +347,12 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
DCHECK_EQ(GetSuspendCount(), 0);
} else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) ||
UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kWaitingForFlipFunction))) {
- // The thread should be suspended while another thread is running the flip function.
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- LOG(FATAL) << "Transitioning to Runnable while another thread is running the flip function,"
- // Note: Keeping unused flags. If they are set, it points to memory corruption.
- << " flags=" << old_state_and_flags.WithState(ThreadState::kRunnable).GetValue()
- << " state=" << old_state_and_flags.GetState();
+ // It's possible that some thread runs this thread's flip-function in
+ // Thread::GetPeerFromOtherThread() even though it was runnable.
+ WaitForFlipFunction(this);
} else {
DCHECK(old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction));
- // CAS the value with a memory barrier.
- // Do not set `ThreadFlag::kRunningFlipFunction` as no other thread can run
- // the flip function for a thread that is not suspended.
- StateAndFlags new_state_and_flags = old_state_and_flags.WithState(ThreadState::kRunnable)
- .WithoutFlag(ThreadFlag::kPendingFlipFunction);
- if (LIKELY(tls32_.state_and_flags.CompareAndSetWeakAcquire(old_state_and_flags.GetValue(),
- new_state_and_flags.GetValue()))) {
- // Mark the acquisition of a share of the mutator lock.
- GetMutatorLock()->TransitionFromSuspendedToRunnable(this);
- // Run the flip function.
- RunFlipFunction(this, /*notify=*/ false);
+ if (EnsureFlipFunctionStarted(this, old_state_and_flags)) {
break;
}
}
@@ -438,7 +425,7 @@ inline void Thread::RevokeThreadLocalAllocationStack() {
if (kIsDebugBuild) {
// Note: self is not necessarily equal to this thread since thread may be suspended.
Thread* self = Thread::Current();
- DCHECK(this == self || IsSuspended() || GetState() == ThreadState::kWaitingPerformingGc)
+ DCHECK(this == self || GetState() != ThreadState::kRunnable)
<< GetState() << " thread " << this << " self " << self;
}
tlsPtr_.thread_local_alloc_stack_end = nullptr;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7d0847c640..4c9d5efe42 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -144,6 +144,9 @@ thread_local Thread* Thread::self_tls_ = nullptr;
#endif
static constexpr bool kVerifyImageObjectsMarked = kIsDebugBuild;
+// Amount of time (in microseconds) that we sleep if another thread is running
+// flip function of the thread that we are interested in.
+static constexpr size_t kSuspendTimeDuringFlip = 5'000;
// For implicit overflow checks we reserve an extra piece of memory at the bottom
// of the stack (lowest memory). The higher portion of the memory
@@ -1605,25 +1608,8 @@ void Thread::ClearSuspendBarrier(AtomicInteger* target) {
}
void Thread::RunCheckpointFunction() {
- // If this thread is suspended and another thread is running the checkpoint on its behalf,
- // we may have a pending flip function that we need to run for the sake of those checkpoints
- // that need to walk the stack. We should not see the flip function flags when the thread
- // is running the checkpoint on its own.
- StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
- if (UNLIKELY(state_and_flags.IsAnyOfFlagsSet(FlipFunctionFlags()))) {
- DCHECK(IsSuspended());
- Thread* self = Thread::Current();
- DCHECK(self != this);
- if (state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) {
- EnsureFlipFunctionStarted(self);
- state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
- DCHECK(!state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction));
- }
- if (state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) {
- WaitForFlipFunction(self);
- }
- }
-
+ DCHECK_EQ(Thread::Current(), this);
+ CHECK(!GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags()));
// Grab the suspend_count lock, get the next checkpoint and update all the checkpoint fields. If
// there are no more checkpoints we will also clear the kCheckpointRequest flag.
Closure* checkpoint;
@@ -1811,7 +1797,7 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend
!self->GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags()));
EnsureFlipFunctionStarted(self);
while (GetStateAndFlags(std::memory_order_acquire).IsAnyOfFlagsSet(FlipFunctionFlags())) {
- sched_yield();
+ usleep(kSuspendTimeDuringFlip);
}
function->Run(this);
@@ -1823,12 +1809,8 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend
DCHECK_NE(GetState(), ThreadState::kRunnable);
bool updated = ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
DCHECK(updated);
- }
-
- {
// Imitate ResumeAll, the thread may be waiting on Thread::resume_cond_ since we raised its
// suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
Thread::resume_cond_->Broadcast(self);
}
@@ -1849,23 +1831,43 @@ void Thread::SetFlipFunction(Closure* function) {
AtomicSetFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_release);
}
-void Thread::EnsureFlipFunctionStarted(Thread* self) {
- while (true) {
- StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
- if (!old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) {
- return;
- }
+bool Thread::EnsureFlipFunctionStarted(Thread* self, StateAndFlags old_state_and_flags) {
+ bool become_runnable;
+ if (old_state_and_flags.GetValue() == 0) {
+ become_runnable = false;
+ old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
+ } else {
+ become_runnable = true;
+ DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest));
+ }
+
+ while (old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) {
DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction));
StateAndFlags new_state_and_flags =
old_state_and_flags.WithFlag(ThreadFlag::kRunningFlipFunction)
.WithoutFlag(ThreadFlag::kPendingFlipFunction);
+ if (become_runnable) {
+ DCHECK_EQ(self, this);
+ DCHECK_NE(self->GetState(), ThreadState::kRunnable);
+ new_state_and_flags = new_state_and_flags.WithState(ThreadState::kRunnable);
+ }
if (tls32_.state_and_flags.CompareAndSetWeakAcquire(old_state_and_flags.GetValue(),
new_state_and_flags.GetValue())) {
+ if (become_runnable) {
+ GetMutatorLock()->TransitionFromSuspendedToRunnable(this);
+ }
+ art::Locks::mutator_lock_->AssertSharedHeld(self);
RunFlipFunction(self, /*notify=*/ true);
DCHECK(!GetStateAndFlags(std::memory_order_relaxed).IsAnyOfFlagsSet(FlipFunctionFlags()));
- return;
+ return true;
+ } else {
+ old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
+ if (become_runnable && old_state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)) {
+ break;
+ }
}
}
+ return false;
}
void Thread::RunFlipFunction(Thread* self, bool notify) {
@@ -4712,16 +4714,14 @@ bool Thread::IsAotCompiler() {
return Runtime::Current()->IsAotCompiler();
}
-mirror::Object* Thread::GetPeerFromOtherThread() const {
+mirror::Object* Thread::GetPeerFromOtherThread() {
DCHECK(tlsPtr_.jpeer == nullptr);
- mirror::Object* peer = tlsPtr_.opeer;
- if (gUseReadBarrier && Current()->GetIsGcMarking()) {
- // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack
- // may have not been flipped yet and peer may be a from-space (stale) ref. So explicitly
- // mark/forward it here.
- peer = art::ReadBarrier::Mark(peer);
- }
- return peer;
+ // Ensure that opeer is not obsolete.
+ EnsureFlipFunctionStarted(Thread::Current());
+ while (GetStateAndFlags(std::memory_order_acquire).IsAnyOfFlagsSet(FlipFunctionFlags())) {
+ usleep(kSuspendTimeDuringFlip);
+ }
+ return tlsPtr_.opeer;
}
void Thread::SetReadBarrierEntrypoints() {
diff --git a/runtime/thread.h b/runtime/thread.h
index 578f9899d4..08c803d064 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -382,15 +382,8 @@ class Thread {
// Set the flip function. This is done with all threads suspended, except for the calling thread.
void SetFlipFunction(Closure* function);
- // Ensure that thread flip function started running. If no other thread is executing
- // it, the calling thread shall run the flip function and then notify other threads
- // that have tried to do that concurrently. After this function returns, the
- // `ThreadFlag::kPendingFlipFunction` is cleared but another thread may still
- // run the flip function as indicated by the `ThreadFlag::kRunningFlipFunction`.
- void EnsureFlipFunctionStarted(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
-
// Wait for the flip function to complete if still running on another thread.
- void WaitForFlipFunction(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
+ void WaitForFlipFunction(Thread* self);
gc::accounting::AtomicStack<mirror::Object>* GetThreadLocalMarkStack() {
CHECK(gUseReadBarrier);
@@ -523,10 +516,12 @@ class Thread {
CHECK(tlsPtr_.jpeer == nullptr);
return tlsPtr_.opeer;
}
- // GetPeer is not safe if called on another thread in the middle of the CC thread flip and
+ // GetPeer is not safe if called on another thread in the middle of the thread flip and
// the thread's stack may have not been flipped yet and peer may be a from-space (stale) ref.
- // This function will explicitly mark/forward it.
- mirror::Object* GetPeerFromOtherThread() const REQUIRES_SHARED(Locks::mutator_lock_);
+ // This function will force a flip for the other thread if necessary.
+ // Since we hold a shared mutator lock, a new flip function cannot be concurrently
+ // installed
+ mirror::Object* GetPeerFromOtherThread() REQUIRES_SHARED(Locks::mutator_lock_);
bool HasPeer() const {
return tlsPtr_.jpeer != nullptr || tlsPtr_.opeer != nullptr;
@@ -1748,6 +1743,19 @@ class Thread {
// to do that concurrently.
void RunFlipFunction(Thread* self, bool notify) REQUIRES_SHARED(Locks::mutator_lock_);
+ // Ensure that thread flip function started running. If no other thread is executing
+ // it, the calling thread shall run the flip function and then notify other threads
+ // that have tried to do that concurrently. After this function returns, the
+ // `ThreadFlag::kPendingFlipFunction` is cleared but another thread may still
+ // run the flip function as indicated by the `ThreadFlag::kRunningFlipFunction`.
+ // A non-zero 'old_state_and_flags' indicates that the thread should logically
+ // acquire mutator lock if we win the race to run the flip function, if a
+ // suspend request is not already set. A zero 'old_state_and_flags' indicates
+ // we already hold the mutator lock.
+ // Returns true if this call ran the flip function.
+ bool EnsureFlipFunctionStarted(Thread* self, StateAndFlags old_state_and_flags = StateAndFlags(0))
+ TRY_ACQUIRE_SHARED(true, Locks::mutator_lock_);
+
static void ThreadExitCallback(void* arg);
// Maximum number of suspend barriers.
@@ -2174,8 +2182,7 @@ class Thread {
friend class QuickExceptionHandler; // For dumping the stack.
friend class ScopedThreadStateChange;
friend class StubTest; // For accessing entrypoints.
- friend class ThreadList; // For ~Thread and Destroy.
-
+ friend class ThreadList; // For ~Thread, Destroy and EnsureFlipFunctionStarted.
friend class EntrypointsOrderTest; // To test the order of tls entries.
friend class JniCompilerTest; // For intercepting JNI entrypoint calls.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index e66ec899e2..5c7132425c 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -388,17 +388,39 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback
// Run the checkpoint on ourself while we wait for threads to suspend.
checkpoint_function->Run(self);
+ bool repeat = true;
// Run the checkpoint on the suspended threads.
- for (const auto& thread : suspended_count_modified_threads) {
- // We know for sure that the thread is suspended at this point.
- DCHECK(thread->IsSuspended());
- checkpoint_function->Run(thread);
- {
- MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
- bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
- DCHECK(updated);
+ while (repeat) {
+ repeat = false;
+ for (auto& thread : suspended_count_modified_threads) {
+ if (thread != nullptr) {
+ // We know for sure that the thread is suspended at this point.
+ DCHECK(thread->IsSuspended());
+ // Make sure there is no pending flip function before running checkpoint
+ // on behalf of thread.
+ thread->EnsureFlipFunctionStarted(self);
+ if (thread->GetStateAndFlags(std::memory_order_acquire)
+ .IsAnyOfFlagsSet(Thread::FlipFunctionFlags())) {
+ // There is another thread running the flip function for 'thread'.
+ // Instead of waiting for it to complete, move to the next thread.
+ repeat = true;
+ continue;
+ }
+ checkpoint_function->Run(thread);
+ {
+ MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+ bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
+ DCHECK(updated);
+ }
+ // We are done with 'thread' so set it to nullptr so that next outer
+ // loop iteration, if any, skips 'thread'.
+ thread = nullptr;
+ }
}
}
+ DCHECK(std::all_of(suspended_count_modified_threads.cbegin(),
+ suspended_count_modified_threads.cend(),
+ [](Thread* thread) { return thread == nullptr; }));
{
// Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their