summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/jni/jni_compiler_test.cc4
-rw-r--r--openjdkjvmti/ti_thread.cc2
-rw-r--r--runtime/entrypoints/quick/quick_jni_entrypoints.cc2
-rw-r--r--runtime/entrypoints/quick/quick_trampoline_entrypoints.cc2
-rw-r--r--runtime/mutator_gc_coord.md119
-rw-r--r--runtime/thread-inl.h22
-rw-r--r--runtime/thread.cc56
-rw-r--r--runtime/thread.h9
-rw-r--r--runtime/thread_list.cc6
-rw-r--r--runtime/thread_list.h11
10 files changed, 195 insertions, 38 deletions
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 35ee1edafd..1a408bb724 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -912,14 +912,14 @@ void JniCompilerTest::CompileAndRun_fooJJ_synchronizedImpl() {
gLogVerbosity.systrace_lock_logging = true;
InitEntryPoints(&self->tlsPtr_.jni_entrypoints,
&self->tlsPtr_.quick_entrypoints,
- self->ReadFlag(ThreadFlag::kMonitorJniEntryExit));
+ self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed));
result = env_->CallNonvirtualLongMethod(jobj_, jklass_, jmethod_, a, b);
EXPECT_EQ(a | b, result);
EXPECT_EQ(5, gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni]);
gLogVerbosity.systrace_lock_logging = false;
InitEntryPoints(&self->tlsPtr_.jni_entrypoints,
&self->tlsPtr_.quick_entrypoints,
- self->ReadFlag(ThreadFlag::kMonitorJniEntryExit));
+ self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed));
gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni] = 0;
}
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 9c71d4e3cc..f89a9d9a14 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -550,7 +550,7 @@ static jint GetJavaStateFromInternal(const InternalThreadState& state) {
// Suspends the current thread if it has any suspend requests on it.
void ThreadUtil::SuspendCheck(art::Thread* self) {
- DCHECK(!self->ReadFlag(art::ThreadFlag::kSuspensionImmune));
+ DCHECK(!self->ReadFlag(art::ThreadFlag::kSuspensionImmune, std::memory_order_relaxed));
art::ScopedObjectAccess soa(self);
// Really this is only needed if we are in FastJNI and actually have the mutator_lock_ already.
self->FullSuspendCheck();
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 1359fef086..d4dc4714ba 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -158,7 +158,7 @@ extern uint64_t GenericJniMethodEnd(Thread* self,
// @CriticalNative does not do a state transition. @FastNative usually does not do a state
// transition either but it performs a suspend check that may do state transitions.
if (LIKELY(normal_native)) {
- if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit))) {
+ if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed))) {
artJniMonitoredMethodEnd(self);
} else {
artJniMethodEnd(self);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 6c817d5e16..90c0aea478 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2101,7 +2101,7 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self,
return nullptr; // Report error.
}
}
- if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit))) {
+ if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed))) {
artJniMonitoredMethodStart(self);
} else {
artJniMethodStart(self);
diff --git a/runtime/mutator_gc_coord.md b/runtime/mutator_gc_coord.md
index 01e3ef025d..35996a5f8e 100644
--- a/runtime/mutator_gc_coord.md
+++ b/runtime/mutator_gc_coord.md
@@ -308,6 +308,125 @@ to prevent a single thread suspension of a thread currently between
it will complete before it can be affected by suspension requests from other
threads.
+
+Thread state transitions, flags, and memory ordering
+----------------------------------------------------
+
+Logically when a state transitions to state `kRunnable`, it acquires the mutator
+lock (in shared mode). When it changes its state back to suspended, it releases
+that lock. These must enforce proper happens-before ordering with respect to a
+thread acquiring the mutator lock in exclusive mode. Thus changing the thread
+state to suspended normally requires a release operation, to ensure visibility
+to a thread wishing to "suspend". Conversely, when we change the state back to
+`kRunnable` in `TransitionFromSuspendedToRunnable` we do so with an acquire
+compare-exchange operation to check that no suspension request remains.
+
+Any suspending thread should clear `kSuspendRequest` with a release
+operation. This, together with the acquire compare-exchange when returning to
+runnable state, ensure that suspend-count decrements happen-before a thread
+becomes runnable again.
+
+Flags are often set and tested with relaxed ordering, even when they may
+communicate a request to another thread. The reason this is safe varies.
+Generally the request is conformed by some other better synchronized
+interaction, which establishes the necessary ordering. In particular:
+
+`kCheckPointRequest`
+: See below. The call happens-before checkpoint execution since
+`checkpoint_function` accesses are guarded by a lock, as are updates to the
+flag. Checkpoint completion ordering is ensured by waiting for both
+`ThreadList::RunCheckpoint()` to complete, and for a barrier indicating
+completion in "runnable" threads.
+
+`kEmptyCheckpointRequest`
+: Currently (12/2024) in flux. See below and b/382722942 . We currently use
+acquire/release ordering to access this flag in some places to partially
+mitigate memory ordering issues here.
+
+`kActiveSuspendBarrier`
+: Changes are protected by `thread_suspend_count_lock_`, and
+`PassActiveSuspendBarriers` rechecks it while holding that lock.
+
+`kPendingFlipFunction`
+: Set while holding `thread_list_lock_`, but this lock is not consistently held
+while checking the flag, notably in `EnsureFlipFunctionStarted()`. We need
+acquire/release ordering to make sure that the requestor happens-before flip
+function execution, and the flip function is properly visible, among other
+things. We still commonly read the flag using a relaxed operation, but we
+confirm it with an acquire compare-exchange operation in
+`EnsureFlipFunctionStarted()` before actiong on it.
+
+`kRunningFlipFunction`
+: Cleared with release ordering, and read with acquire ordering by
+`WaitForFlipFunction` and its callers, thus ensuring that flip function
+execution happens-before completion of WaitForFlipFunction.
+
+`kSuspensionImmune`
+: Guarded by `thread_suspend_count_lock_`.
+
+### Checkpoint-specific considerations
+
+Checkpoints expose additional tricky issues, since they are also used to ensure
+that certain global state changes have propagated to all running Java threads.
+
+`ThreadList::RunCheckpoint()` guarantees that "if at point _X_ `RunCheckpoint()`
+has returned, and all checkpoints have been properly observed to have completed,
+then every thread has executed a code sequence _S_ during which it remained in
+a suspended state, such that the call to `RunCheckpoint` happens-before the end
+of _S_, and the beginning of _S_ happened before _X_."
+
+For each thread _T_, we attempt to atomically install the `kCheckpointRequest`
+flag while ensuring that _T_ is runnable. If this succeeds, the fact that
+`tlsPtr_.checkpoint_function` is protected by `thread_suspend_count_lock_`, and
+`checkpoint_function` is written by the requestor, and read by _T_ at a suspend
+point, ensures that the call happens-before _S_. Normally a barrier ensures that
+_S_ happens-before _X_ .
+
+If we are unable to do so for a thread _T_ because we find it suspended, we run
+`checkpoint_function` ourselves. Before running it we set `kSuspendRequest`
+(with a release store) while _T_ is in _S_, preventing _T_ from terminating _S_
+by becoming runnable, after an acquire load of the flags. This ensures that the
+`RunCheckpoint()` call happens-before the end of _S_. Since we learned of _T_'s
+suspended state via an acquire load of its state, which it stored with a release
+store, we know that the beginning of _S_ happens-before we return from
+`RunCheckpoint()`, and hence before _X_ .
+
+The case of empty checkpoints is worth highlighting. We have traditionally
+optimized that by avoiding ever suspending the affected threads. This appears
+correct if all operations are sequentially consistent. But once memory model
+issues are considered, it appears more expensive to do fully correctly than it
+is to forego the optimization, as we explain below:
+
+We need to ensure that if Thread _A_ performs some update _U_ and then calls
+`RunEmptyCheckpoint()` then, when `RunEmptyCheckpoint()` returns, every Thread
+_B_ will have passed a suspend point at which _U_ became guaranteed visible to
+that thread. This could be ensured in different ways, depending on the observed
+state of Thread _B_:
+
+Runnable
+: Use acquire/release ordering when setting and detecting the
+`kEmptyCheckpointRequest` flag, and then use a barrier to observe when thread _B_
+has done so. In this case, Thread _B_ actually executes code at a suspend point.
+The acquire release ordering on `kEmptyCheckpointRequest` ensures that _U_
+happens-before the suspend point, and the barrier ensures that the suspend point
+happens-before the `RunEmptyCheckpoint()` return.
+
+Suspended
+: In this case, we have no mechanism for the thread to execute synchronization
+operations, and things become trickier. Thread _B_ is suspended, but it may
+restart at any time. Thread _A_ wants to conclude that Thread _B_ is already
+suspended at a suspend point, and thus it is safe to ignore Thread _B_.
+Effectively, it wants to perform the update _U_, read _B_'s state, and continue,
+while _B_ may change its state back to runnable, and then read the result of
+_U_. Both threads effectively perform a store (either _U_ or to the state of
+_B_) and then read the other value. With only acquire/release ordering, this can
+result in _A_ seeing the old suspended state, and _B_ failing to see the update
+_U_, which is an incorrect outcome.
+: The canonical fix for this kind of `store; load` reordering problem is to make
+all operations sequentially consistent. There does not appear to be an easy way
+to limit this overhead to just empty checkpoint execution. Thus it appears to be
+better to forego this "optimization".
+
[^1]: In the most recent versions of ART, compiler-generated code loads through
the address at `tlsPtr_.suspend_trigger`. A thread suspension is requested
by setting this to null, triggering a `SIGSEGV`, causing that thread to
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 432453e311..431d66d7ff 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -83,7 +83,11 @@ inline void Thread::AllowThreadSuspension() {
inline void Thread::CheckSuspend(bool implicit) {
DCHECK_EQ(Thread::Current(), this);
while (true) {
- StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
+ // Memory_order_relaxed should be OK, since RunCheckpointFunction shares a lock with the
+ // requestor, and FullSuspendCheck() re-checks later. But we currently need memory_order_acquire
+ // for the empty checkpoint path.
+ // TODO (b/382722942): Revisit after we fix RunEmptyCheckpoint().
+ StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_acquire);
if (LIKELY(!state_and_flags.IsAnyOfFlagsSet(SuspendOrCheckpointRequestFlags()))) {
break;
} else if (state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest)) {
@@ -110,7 +114,8 @@ inline void Thread::CheckEmptyCheckpointFromWeakRefAccess(BaseMutex* cond_var_mu
Thread* self = Thread::Current();
DCHECK_EQ(self, this);
for (;;) {
- if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest)) {
+ // TODO (b/382722942): Revisit memory ordering after we fix RunEmptyCheckpoint().
+ if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_acquire)) {
RunEmptyCheckpoint();
// Check we hold only an expected mutex when accessing weak ref.
if (kIsDebugBuild) {
@@ -135,7 +140,8 @@ inline void Thread::CheckEmptyCheckpointFromWeakRefAccess(BaseMutex* cond_var_mu
inline void Thread::CheckEmptyCheckpointFromMutex() {
DCHECK_EQ(Thread::Current(), this);
for (;;) {
- if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest)) {
+ // TODO (b/382722942): Revisit memory ordering after we fix RunEmptyCheckpoint().
+ if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_acquire)) {
RunEmptyCheckpoint();
} else {
break;
@@ -234,7 +240,11 @@ inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state) {
DCHECK_NE(new_state, ThreadState::kRunnable);
while (true) {
- StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
+ // memory_order_relaxed is OK for ordinary checkpoints, which enforce ordering via
+ // thread_suspend_count_lock_ . It is not currently OK for empty checkpoints.
+ // TODO (b/382722942): Consider changing back to memory_order_relaxed after fixing empty
+ // checkpoints.
+ StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_acquire);
DCHECK_EQ(old_state_and_flags.GetState(), ThreadState::kRunnable);
if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest))) {
IncrementStatsCounter(&checkpoint_count_);
@@ -265,6 +275,8 @@ inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state
inline void Thread::CheckActiveSuspendBarriers() {
DCHECK_NE(GetState(), ThreadState::kRunnable);
while (true) {
+ // memory_order_relaxed is OK here, since PassActiveSuspendBarriers() rechecks with
+ // thread_suspend_count_lock_ .
StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
if (LIKELY(!state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) &&
!state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest) &&
@@ -540,7 +552,7 @@ inline void Thread::IncrementSuspendCount(Thread* self) {
}
inline void Thread::DecrementSuspendCount(Thread* self, bool for_user_code) {
- DCHECK(ReadFlag(ThreadFlag::kSuspendRequest));
+ DCHECK(ReadFlag(ThreadFlag::kSuspendRequest, std::memory_order_relaxed));
Locks::thread_suspend_count_lock_->AssertHeld(self);
if (UNLIKELY(tls32_.suspend_count <= 0)) {
UnsafeLogFatalForSuspendCount(self, this);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5bd97eb59a..a94dbd697d 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1530,7 +1530,7 @@ bool Thread::PassActiveSuspendBarriers() {
std::vector<AtomicInteger*> pass_barriers{};
{
MutexLock mu(this, *Locks::thread_suspend_count_lock_);
- if (!ReadFlag(ThreadFlag::kActiveSuspendBarrier)) {
+ if (!ReadFlag(ThreadFlag::kActiveSuspendBarrier, std::memory_order_relaxed)) {
// Quick exit test: The barriers have already been claimed - this is possible as there may
// be a race to claim and it doesn't matter who wins. All of the callers of this function
// (except SuspendAllInternal) will first test the kActiveSuspendBarrier flag without the
@@ -1604,7 +1604,8 @@ void Thread::RunEmptyCheckpoint() {
// Note: Empty checkpoint does not access the thread's stack,
// so we do not need to check for the flip function.
DCHECK_EQ(Thread::Current(), this);
- AtomicClearFlag(ThreadFlag::kEmptyCheckpointRequest);
+ // See mutator_gc_coord.md and b/382722942 for memory ordering discussion.
+ AtomicClearFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_release);
Runtime::Current()->GetThreadList()->EmptyCheckpointBarrier()->Pass(this);
}
@@ -1626,7 +1627,7 @@ bool Thread::RequestCheckpoint(Closure* function) {
} else {
checkpoint_overflow_.push_back(function);
}
- DCHECK(ReadFlag(ThreadFlag::kCheckpointRequest));
+ DCHECK(ReadFlag(ThreadFlag::kCheckpointRequest, std::memory_order_relaxed));
TriggerSuspend();
return true;
}
@@ -1790,7 +1791,8 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState wait_st
// Since we're runnable, and kPendingFlipFunction is set with all threads suspended, it
// cannot be set again here. Thus kRunningFlipFunction is either already set after the
// EnsureFlipFunctionStarted call, or will not be set before we call Run().
- if (ReadFlag(ThreadFlag::kRunningFlipFunction)) {
+ // See mutator_gc_coord.md for a discussion of memory ordering for thread flags.
+ if (ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) {
WaitForFlipFunction(self);
}
function->Run(this);
@@ -1831,7 +1833,8 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self,
DCHECK(self == Current());
bool check_exited = (tef != nullptr);
// Check that the thread can't unexpectedly exit while we are running.
- DCHECK(self == target || check_exited || target->ReadFlag(ThreadFlag::kSuspendRequest) ||
+ DCHECK(self == target || check_exited ||
+ target->ReadFlag(ThreadFlag::kSuspendRequest, std::memory_order_relaxed) ||
Locks::thread_list_lock_->IsExclusiveHeld(self))
<< *target;
bool become_runnable;
@@ -1857,6 +1860,8 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self,
target->VerifyState();
if (old_state_and_flags.GetValue() == 0) {
become_runnable = false;
+ // Memory_order_relaxed is OK here, since we re-check with memory_order_acquire below before
+ // acting on a pending flip function.
old_state_and_flags = target->GetStateAndFlags(std::memory_order_relaxed);
} else {
become_runnable = true;
@@ -1868,8 +1873,12 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self,
while (true) {
DCHECK(!check_exited || (Locks::thread_list_lock_->IsExclusiveHeld(self) && !tef->HasExited()));
if (!old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) {
+ // Re-read kRunningFlipFunction flag with acquire ordering to ensure that if we claim
+ // flip function has run then its execution happened-before our return.
+ bool running_flip =
+ target->ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire);
maybe_release();
- set_finished(!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction));
+ set_finished(!running_flip);
return false;
}
DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction));
@@ -1900,7 +1909,8 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self,
// Let caller retry.
return false;
}
- old_state_and_flags = target->GetStateAndFlags(std::memory_order_acquire);
+ // Again, we re-read with memory_order_acquire before acting on the flags.
+ old_state_and_flags = target->GetStateAndFlags(std::memory_order_relaxed);
}
// Unreachable.
}
@@ -1908,14 +1918,14 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self,
void Thread::RunFlipFunction(Thread* self) {
// This function is called either by the thread running `ThreadList::FlipThreadRoots()` or when
// a thread becomes runnable, after we've successfully set the kRunningFlipFunction ThreadFlag.
- DCHECK(ReadFlag(ThreadFlag::kRunningFlipFunction));
+ DCHECK(ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_relaxed));
Closure* flip_function = GetFlipFunction();
tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed);
DCHECK(flip_function != nullptr);
VerifyState();
flip_function->Run(this);
- DCHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction));
+ DCHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed));
VerifyState();
AtomicClearFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_release);
// From here on this thread may go away, and it is no longer safe to access.
@@ -1933,12 +1943,12 @@ void Thread::WaitForFlipFunction(Thread* self) const {
// Repeat the check after waiting to guard against spurious wakeups (and because
// we share the `thread_suspend_count_lock_` and `resume_cond_` with other code).
// Check that the thread can't unexpectedly exit while we are running.
- DCHECK(self == this || ReadFlag(ThreadFlag::kSuspendRequest) ||
+ DCHECK(self == this || ReadFlag(ThreadFlag::kSuspendRequest, std::memory_order_relaxed) ||
Locks::thread_list_lock_->IsExclusiveHeld(self));
MutexLock mu(self, *Locks::thread_suspend_count_lock_);
while (true) {
- StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_acquire);
- if (!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) {
+ // See mutator_gc_coord.md for a discussion of memory ordering for thread flags.
+ if (!ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) {
return;
}
// We sometimes hold mutator lock here. OK since the flip function must complete quickly.
@@ -1958,9 +1968,10 @@ void Thread::WaitForFlipFunctionTestingExited(Thread* self, ThreadExitFlag* tef)
// complicated locking dance.
MutexLock mu(self, *Locks::thread_suspend_count_lock_);
while (true) {
- StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_acquire);
+ // See mutator_gc_coord.md for a discussion of memory ordering for thread flags.
+ bool running_flip = ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire);
Locks::thread_list_lock_->Unlock(self); // So we can wait or return.
- if (!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) {
+ if (!running_flip) {
return;
}
resume_cond_->WaitHoldingLocks(self);
@@ -1976,7 +1987,7 @@ void Thread::WaitForFlipFunctionTestingExited(Thread* self, ThreadExitFlag* tef)
void Thread::FullSuspendCheck(bool implicit) {
ScopedTrace trace(__FUNCTION__);
- DCHECK(!ReadFlag(ThreadFlag::kSuspensionImmune));
+ DCHECK(!ReadFlag(ThreadFlag::kSuspensionImmune, std::memory_order_relaxed));
DCHECK(this == Thread::Current());
VLOG(threads) << this << " self-suspending";
// Make thread appear suspended to other threads, release mutator_lock_.
@@ -2707,9 +2718,9 @@ Thread::~Thread() {
tlsPtr_.jni_env = nullptr;
}
CHECK_NE(GetState(), ThreadState::kRunnable);
- CHECK(!ReadFlag(ThreadFlag::kCheckpointRequest));
- CHECK(!ReadFlag(ThreadFlag::kEmptyCheckpointRequest));
- CHECK(!ReadFlag(ThreadFlag::kSuspensionImmune));
+ CHECK(!ReadFlag(ThreadFlag::kCheckpointRequest, std::memory_order_relaxed));
+ CHECK(!ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_relaxed));
+ CHECK(!ReadFlag(ThreadFlag::kSuspensionImmune, std::memory_order_relaxed));
CHECK(tlsPtr_.checkpoint_function == nullptr);
CHECK_EQ(checkpoint_overflow_.size(), 0u);
// A pending flip function request is OK. FlipThreadRoots will have been notified that we
@@ -4818,11 +4829,11 @@ mirror::Object* Thread::GetPeerFromOtherThread() {
// mutator lock in exclusive mode, and we should not have a pending flip function.
if (kIsDebugBuild && Locks::thread_list_lock_->IsExclusiveHeld(self)) {
Locks::mutator_lock_->AssertExclusiveHeld(self);
- CHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction));
+ CHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed));
}
// Ensure that opeer is not obsolete.
EnsureFlipFunctionStarted(self, this);
- if (ReadFlag(ThreadFlag::kRunningFlipFunction)) {
+ if (ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) {
// Does not release mutator lock. Hence no new flip requests can be issued.
WaitForFlipFunction(self);
}
@@ -4833,7 +4844,8 @@ mirror::Object* Thread::LockedGetPeerFromOtherThread(ThreadExitFlag* tef) {
DCHECK(tlsPtr_.jpeer == nullptr);
Thread* self = Thread::Current();
Locks::thread_list_lock_->AssertHeld(self);
- if (ReadFlag(ThreadFlag::kPendingFlipFunction)) {
+ // memory_order_relaxed is OK here, because we recheck it later with acquire order.
+ if (ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed)) {
// It is unsafe to call EnsureFlipFunctionStarted with thread_list_lock_. Thus we temporarily
// release it, taking care to handle the case in which "this" thread disapppears while we no
// longer hold it.
@@ -4844,7 +4856,7 @@ mirror::Object* Thread::LockedGetPeerFromOtherThread(ThreadExitFlag* tef) {
return nullptr;
}
}
- if (ReadFlag(ThreadFlag::kRunningFlipFunction)) {
+ if (ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) {
// Does not release mutator lock. Hence no new flip requests can be issued.
WaitForFlipFunction(self);
}
diff --git a/runtime/thread.h b/runtime/thread.h
index 8ce9854212..3ccf7c3c97 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -127,6 +127,7 @@ enum class ThreadFlag : uint32_t {
kSuspendRequest = 1u << 0,
// Request that the thread do some checkpoint work and then continue.
+ // Only modified while holding thread_suspend_count_lock_ .
kCheckpointRequest = 1u << 1,
// Request that the thread do empty checkpoint and then continue.
@@ -158,7 +159,7 @@ enum class ThreadFlag : uint32_t {
// in any case not check for such requests; other clients of SuspendAll might.
// Prevents a situation in which we are asked to suspend just before we suspend all
// other threads, and then notice the suspension request and suspend ourselves,
- // leading to deadlock. Guarded by suspend_count_lock_ .
+ // leading to deadlock. Guarded by thread_suspend_count_lock_ .
// Should not ever be set when we try to transition to kRunnable.
// TODO(b/296639267): Generalize use to prevent SuspendAll from blocking
// in-progress GC.
@@ -1451,8 +1452,10 @@ class EXPORT Thread {
// Undo the effect of the previous call. Again only invoked by the thread itself.
void AllowPreMonitorMutexes();
- bool ReadFlag(ThreadFlag flag) const {
- return GetStateAndFlags(std::memory_order_relaxed).IsFlagSet(flag);
+ // Read a flag with the given memory order. See mutator_gc_coord.md for memory ordering
+ // considerations.
+ bool ReadFlag(ThreadFlag flag, std::memory_order order) const {
+ return GetStateAndFlags(order).IsFlagSet(flag);
}
void AtomicSetFlag(ThreadFlag flag, std::memory_order order = std::memory_order_seq_cst) {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 3665b34cd5..6a8a7f566c 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -557,7 +557,7 @@ void ThreadList::RunEmptyCheckpoint() {
std::find(runnable_thread_ids.begin(), runnable_thread_ids.end(), tid) !=
runnable_thread_ids.end();
if (is_in_runnable_thread_ids &&
- thread->ReadFlag(ThreadFlag::kEmptyCheckpointRequest)) {
+ thread->ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_relaxed)) {
// Found a runnable thread that hasn't responded to the empty checkpoint request.
// Assume it's stuck and safe to dump its stack.
thread->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT),
@@ -877,8 +877,8 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
}
// SuspendAllInternal blocks if we are in the middle of a flip.
- DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction));
- DCHECK(!self->ReadFlag(ThreadFlag::kRunningFlipFunction));
+ DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed));
+ DCHECK(!self->ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_relaxed));
ATraceBegin((std::string("Mutator threads suspended for ") + cause).c_str());
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 1cf28989fb..4bf08dc9c1 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -138,6 +138,13 @@ class ThreadList {
// threads must act on that. It is possible that on return there will be threads which have not,
// and will not, run the checkpoint_function, and neither have/will any of their ancestors.
//
+ // We guarantee that if a thread calls RunCheckpoint() then, if at point X RunCheckpoint() has
+ // returned, and all checkpoints have been properly observed to have completed (usually via a
+ // barrier), then every thread has executed a code sequence S during which it remained in a
+ // suspended state, such that the call to `RunCheckpoint` happens-before the end of S, and the
+ // beginning of S happened-before X. Thus after a RunCheckpoint() call, no preexisting
+ // thread can still be relying on global information it caches between suspend points.
+ //
// TODO: Is it possible to simplify mutator_lock handling here? Should this wait for completion?
EXPORT size_t RunCheckpoint(Closure* checkpoint_function,
Closure* callback = nullptr,
@@ -157,6 +164,10 @@ class ThreadList {
// in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by
// decrementing the empty checkpoint barrier count. This works even when the weak ref access is
// disabled. Only one concurrent use is currently supported.
+ // TODO(b/382722942): This is intended to guarantee the analogous memory ordering property to
+ // RunCheckpoint(). It over-optimizes by always avoiding thread suspension and hence does not in
+ // fact guarantee this. (See the discussion in `mutator_gc_coord.md`.) Fix this by implementing
+ // this with RunCheckpoint() instead.
void RunEmptyCheckpoint()
REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);