diff options
-rw-r--r-- | compiler/jni/jni_compiler_test.cc | 4 | ||||
-rw-r--r-- | openjdkjvmti/ti_thread.cc | 2 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_jni_entrypoints.cc | 2 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 2 | ||||
-rw-r--r-- | runtime/mutator_gc_coord.md | 119 | ||||
-rw-r--r-- | runtime/thread-inl.h | 22 | ||||
-rw-r--r-- | runtime/thread.cc | 56 | ||||
-rw-r--r-- | runtime/thread.h | 9 | ||||
-rw-r--r-- | runtime/thread_list.cc | 6 | ||||
-rw-r--r-- | runtime/thread_list.h | 11 |
10 files changed, 195 insertions, 38 deletions
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 35ee1edafd..1a408bb724 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -912,14 +912,14 @@ void JniCompilerTest::CompileAndRun_fooJJ_synchronizedImpl() { gLogVerbosity.systrace_lock_logging = true; InitEntryPoints(&self->tlsPtr_.jni_entrypoints, &self->tlsPtr_.quick_entrypoints, - self->ReadFlag(ThreadFlag::kMonitorJniEntryExit)); + self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed)); result = env_->CallNonvirtualLongMethod(jobj_, jklass_, jmethod_, a, b); EXPECT_EQ(a | b, result); EXPECT_EQ(5, gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni]); gLogVerbosity.systrace_lock_logging = false; InitEntryPoints(&self->tlsPtr_.jni_entrypoints, &self->tlsPtr_.quick_entrypoints, - self->ReadFlag(ThreadFlag::kMonitorJniEntryExit)); + self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed)); gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni] = 0; } diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc index 9c71d4e3cc..f89a9d9a14 100644 --- a/openjdkjvmti/ti_thread.cc +++ b/openjdkjvmti/ti_thread.cc @@ -550,7 +550,7 @@ static jint GetJavaStateFromInternal(const InternalThreadState& state) { // Suspends the current thread if it has any suspend requests on it. void ThreadUtil::SuspendCheck(art::Thread* self) { - DCHECK(!self->ReadFlag(art::ThreadFlag::kSuspensionImmune)); + DCHECK(!self->ReadFlag(art::ThreadFlag::kSuspensionImmune, std::memory_order_relaxed)); art::ScopedObjectAccess soa(self); // Really this is only needed if we are in FastJNI and actually have the mutator_lock_ already. self->FullSuspendCheck(); diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index 1359fef086..d4dc4714ba 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -158,7 +158,7 @@ extern uint64_t GenericJniMethodEnd(Thread* self, // @CriticalNative does not do a state transition. @FastNative usually does not do a state // transition either but it performs a suspend check that may do state transitions. if (LIKELY(normal_native)) { - if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit))) { + if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed))) { artJniMonitoredMethodEnd(self); } else { artJniMethodEnd(self); diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 6c817d5e16..90c0aea478 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -2101,7 +2101,7 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, return nullptr; // Report error. } } - if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit))) { + if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit, std::memory_order_relaxed))) { artJniMonitoredMethodStart(self); } else { artJniMethodStart(self); diff --git a/runtime/mutator_gc_coord.md b/runtime/mutator_gc_coord.md index 01e3ef025d..35996a5f8e 100644 --- a/runtime/mutator_gc_coord.md +++ b/runtime/mutator_gc_coord.md @@ -308,6 +308,125 @@ to prevent a single thread suspension of a thread currently between it will complete before it can be affected by suspension requests from other threads. + +Thread state transitions, flags, and memory ordering +---------------------------------------------------- + +Logically when a state transitions to state `kRunnable`, it acquires the mutator +lock (in shared mode). When it changes its state back to suspended, it releases +that lock. These must enforce proper happens-before ordering with respect to a +thread acquiring the mutator lock in exclusive mode. Thus changing the thread +state to suspended normally requires a release operation, to ensure visibility +to a thread wishing to "suspend". Conversely, when we change the state back to +`kRunnable` in `TransitionFromSuspendedToRunnable` we do so with an acquire +compare-exchange operation to check that no suspension request remains. + +Any suspending thread should clear `kSuspendRequest` with a release +operation. This, together with the acquire compare-exchange when returning to +runnable state, ensure that suspend-count decrements happen-before a thread +becomes runnable again. + +Flags are often set and tested with relaxed ordering, even when they may +communicate a request to another thread. The reason this is safe varies. +Generally the request is conformed by some other better synchronized +interaction, which establishes the necessary ordering. In particular: + +`kCheckPointRequest` +: See below. The call happens-before checkpoint execution since +`checkpoint_function` accesses are guarded by a lock, as are updates to the +flag. Checkpoint completion ordering is ensured by waiting for both +`ThreadList::RunCheckpoint()` to complete, and for a barrier indicating +completion in "runnable" threads. + +`kEmptyCheckpointRequest` +: Currently (12/2024) in flux. See below and b/382722942 . We currently use +acquire/release ordering to access this flag in some places to partially +mitigate memory ordering issues here. + +`kActiveSuspendBarrier` +: Changes are protected by `thread_suspend_count_lock_`, and +`PassActiveSuspendBarriers` rechecks it while holding that lock. + +`kPendingFlipFunction` +: Set while holding `thread_list_lock_`, but this lock is not consistently held +while checking the flag, notably in `EnsureFlipFunctionStarted()`. We need +acquire/release ordering to make sure that the requestor happens-before flip +function execution, and the flip function is properly visible, among other +things. We still commonly read the flag using a relaxed operation, but we +confirm it with an acquire compare-exchange operation in +`EnsureFlipFunctionStarted()` before actiong on it. + +`kRunningFlipFunction` +: Cleared with release ordering, and read with acquire ordering by +`WaitForFlipFunction` and its callers, thus ensuring that flip function +execution happens-before completion of WaitForFlipFunction. + +`kSuspensionImmune` +: Guarded by `thread_suspend_count_lock_`. + +### Checkpoint-specific considerations + +Checkpoints expose additional tricky issues, since they are also used to ensure +that certain global state changes have propagated to all running Java threads. + +`ThreadList::RunCheckpoint()` guarantees that "if at point _X_ `RunCheckpoint()` +has returned, and all checkpoints have been properly observed to have completed, +then every thread has executed a code sequence _S_ during which it remained in +a suspended state, such that the call to `RunCheckpoint` happens-before the end +of _S_, and the beginning of _S_ happened before _X_." + +For each thread _T_, we attempt to atomically install the `kCheckpointRequest` +flag while ensuring that _T_ is runnable. If this succeeds, the fact that +`tlsPtr_.checkpoint_function` is protected by `thread_suspend_count_lock_`, and +`checkpoint_function` is written by the requestor, and read by _T_ at a suspend +point, ensures that the call happens-before _S_. Normally a barrier ensures that +_S_ happens-before _X_ . + +If we are unable to do so for a thread _T_ because we find it suspended, we run +`checkpoint_function` ourselves. Before running it we set `kSuspendRequest` +(with a release store) while _T_ is in _S_, preventing _T_ from terminating _S_ +by becoming runnable, after an acquire load of the flags. This ensures that the +`RunCheckpoint()` call happens-before the end of _S_. Since we learned of _T_'s +suspended state via an acquire load of its state, which it stored with a release +store, we know that the beginning of _S_ happens-before we return from +`RunCheckpoint()`, and hence before _X_ . + +The case of empty checkpoints is worth highlighting. We have traditionally +optimized that by avoiding ever suspending the affected threads. This appears +correct if all operations are sequentially consistent. But once memory model +issues are considered, it appears more expensive to do fully correctly than it +is to forego the optimization, as we explain below: + +We need to ensure that if Thread _A_ performs some update _U_ and then calls +`RunEmptyCheckpoint()` then, when `RunEmptyCheckpoint()` returns, every Thread +_B_ will have passed a suspend point at which _U_ became guaranteed visible to +that thread. This could be ensured in different ways, depending on the observed +state of Thread _B_: + +Runnable +: Use acquire/release ordering when setting and detecting the +`kEmptyCheckpointRequest` flag, and then use a barrier to observe when thread _B_ +has done so. In this case, Thread _B_ actually executes code at a suspend point. +The acquire release ordering on `kEmptyCheckpointRequest` ensures that _U_ +happens-before the suspend point, and the barrier ensures that the suspend point +happens-before the `RunEmptyCheckpoint()` return. + +Suspended +: In this case, we have no mechanism for the thread to execute synchronization +operations, and things become trickier. Thread _B_ is suspended, but it may +restart at any time. Thread _A_ wants to conclude that Thread _B_ is already +suspended at a suspend point, and thus it is safe to ignore Thread _B_. +Effectively, it wants to perform the update _U_, read _B_'s state, and continue, +while _B_ may change its state back to runnable, and then read the result of +_U_. Both threads effectively perform a store (either _U_ or to the state of +_B_) and then read the other value. With only acquire/release ordering, this can +result in _A_ seeing the old suspended state, and _B_ failing to see the update +_U_, which is an incorrect outcome. +: The canonical fix for this kind of `store; load` reordering problem is to make +all operations sequentially consistent. There does not appear to be an easy way +to limit this overhead to just empty checkpoint execution. Thus it appears to be +better to forego this "optimization". + [^1]: In the most recent versions of ART, compiler-generated code loads through the address at `tlsPtr_.suspend_trigger`. A thread suspension is requested by setting this to null, triggering a `SIGSEGV`, causing that thread to diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index 432453e311..431d66d7ff 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -83,7 +83,11 @@ inline void Thread::AllowThreadSuspension() { inline void Thread::CheckSuspend(bool implicit) { DCHECK_EQ(Thread::Current(), this); while (true) { - StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed); + // Memory_order_relaxed should be OK, since RunCheckpointFunction shares a lock with the + // requestor, and FullSuspendCheck() re-checks later. But we currently need memory_order_acquire + // for the empty checkpoint path. + // TODO (b/382722942): Revisit after we fix RunEmptyCheckpoint(). + StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_acquire); if (LIKELY(!state_and_flags.IsAnyOfFlagsSet(SuspendOrCheckpointRequestFlags()))) { break; } else if (state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest)) { @@ -110,7 +114,8 @@ inline void Thread::CheckEmptyCheckpointFromWeakRefAccess(BaseMutex* cond_var_mu Thread* self = Thread::Current(); DCHECK_EQ(self, this); for (;;) { - if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest)) { + // TODO (b/382722942): Revisit memory ordering after we fix RunEmptyCheckpoint(). + if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_acquire)) { RunEmptyCheckpoint(); // Check we hold only an expected mutex when accessing weak ref. if (kIsDebugBuild) { @@ -135,7 +140,8 @@ inline void Thread::CheckEmptyCheckpointFromWeakRefAccess(BaseMutex* cond_var_mu inline void Thread::CheckEmptyCheckpointFromMutex() { DCHECK_EQ(Thread::Current(), this); for (;;) { - if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest)) { + // TODO (b/382722942): Revisit memory ordering after we fix RunEmptyCheckpoint(). + if (ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_acquire)) { RunEmptyCheckpoint(); } else { break; @@ -234,7 +240,11 @@ inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const { inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state) { DCHECK_NE(new_state, ThreadState::kRunnable); while (true) { - StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); + // memory_order_relaxed is OK for ordinary checkpoints, which enforce ordering via + // thread_suspend_count_lock_ . It is not currently OK for empty checkpoints. + // TODO (b/382722942): Consider changing back to memory_order_relaxed after fixing empty + // checkpoints. + StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_acquire); DCHECK_EQ(old_state_and_flags.GetState(), ThreadState::kRunnable); if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest))) { IncrementStatsCounter(&checkpoint_count_); @@ -265,6 +275,8 @@ inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state inline void Thread::CheckActiveSuspendBarriers() { DCHECK_NE(GetState(), ThreadState::kRunnable); while (true) { + // memory_order_relaxed is OK here, since PassActiveSuspendBarriers() rechecks with + // thread_suspend_count_lock_ . StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed); if (LIKELY(!state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) && !state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest) && @@ -540,7 +552,7 @@ inline void Thread::IncrementSuspendCount(Thread* self) { } inline void Thread::DecrementSuspendCount(Thread* self, bool for_user_code) { - DCHECK(ReadFlag(ThreadFlag::kSuspendRequest)); + DCHECK(ReadFlag(ThreadFlag::kSuspendRequest, std::memory_order_relaxed)); Locks::thread_suspend_count_lock_->AssertHeld(self); if (UNLIKELY(tls32_.suspend_count <= 0)) { UnsafeLogFatalForSuspendCount(self, this); diff --git a/runtime/thread.cc b/runtime/thread.cc index 5bd97eb59a..a94dbd697d 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1530,7 +1530,7 @@ bool Thread::PassActiveSuspendBarriers() { std::vector<AtomicInteger*> pass_barriers{}; { MutexLock mu(this, *Locks::thread_suspend_count_lock_); - if (!ReadFlag(ThreadFlag::kActiveSuspendBarrier)) { + if (!ReadFlag(ThreadFlag::kActiveSuspendBarrier, std::memory_order_relaxed)) { // Quick exit test: The barriers have already been claimed - this is possible as there may // be a race to claim and it doesn't matter who wins. All of the callers of this function // (except SuspendAllInternal) will first test the kActiveSuspendBarrier flag without the @@ -1604,7 +1604,8 @@ void Thread::RunEmptyCheckpoint() { // Note: Empty checkpoint does not access the thread's stack, // so we do not need to check for the flip function. DCHECK_EQ(Thread::Current(), this); - AtomicClearFlag(ThreadFlag::kEmptyCheckpointRequest); + // See mutator_gc_coord.md and b/382722942 for memory ordering discussion. + AtomicClearFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_release); Runtime::Current()->GetThreadList()->EmptyCheckpointBarrier()->Pass(this); } @@ -1626,7 +1627,7 @@ bool Thread::RequestCheckpoint(Closure* function) { } else { checkpoint_overflow_.push_back(function); } - DCHECK(ReadFlag(ThreadFlag::kCheckpointRequest)); + DCHECK(ReadFlag(ThreadFlag::kCheckpointRequest, std::memory_order_relaxed)); TriggerSuspend(); return true; } @@ -1790,7 +1791,8 @@ bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState wait_st // Since we're runnable, and kPendingFlipFunction is set with all threads suspended, it // cannot be set again here. Thus kRunningFlipFunction is either already set after the // EnsureFlipFunctionStarted call, or will not be set before we call Run(). - if (ReadFlag(ThreadFlag::kRunningFlipFunction)) { + // See mutator_gc_coord.md for a discussion of memory ordering for thread flags. + if (ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) { WaitForFlipFunction(self); } function->Run(this); @@ -1831,7 +1833,8 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self, DCHECK(self == Current()); bool check_exited = (tef != nullptr); // Check that the thread can't unexpectedly exit while we are running. - DCHECK(self == target || check_exited || target->ReadFlag(ThreadFlag::kSuspendRequest) || + DCHECK(self == target || check_exited || + target->ReadFlag(ThreadFlag::kSuspendRequest, std::memory_order_relaxed) || Locks::thread_list_lock_->IsExclusiveHeld(self)) << *target; bool become_runnable; @@ -1857,6 +1860,8 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self, target->VerifyState(); if (old_state_and_flags.GetValue() == 0) { become_runnable = false; + // Memory_order_relaxed is OK here, since we re-check with memory_order_acquire below before + // acting on a pending flip function. old_state_and_flags = target->GetStateAndFlags(std::memory_order_relaxed); } else { become_runnable = true; @@ -1868,8 +1873,12 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self, while (true) { DCHECK(!check_exited || (Locks::thread_list_lock_->IsExclusiveHeld(self) && !tef->HasExited())); if (!old_state_and_flags.IsFlagSet(ThreadFlag::kPendingFlipFunction)) { + // Re-read kRunningFlipFunction flag with acquire ordering to ensure that if we claim + // flip function has run then its execution happened-before our return. + bool running_flip = + target->ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire); maybe_release(); - set_finished(!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)); + set_finished(!running_flip); return false; } DCHECK(!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)); @@ -1900,7 +1909,8 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self, // Let caller retry. return false; } - old_state_and_flags = target->GetStateAndFlags(std::memory_order_acquire); + // Again, we re-read with memory_order_acquire before acting on the flags. + old_state_and_flags = target->GetStateAndFlags(std::memory_order_relaxed); } // Unreachable. } @@ -1908,14 +1918,14 @@ bool Thread::EnsureFlipFunctionStarted(Thread* self, void Thread::RunFlipFunction(Thread* self) { // This function is called either by the thread running `ThreadList::FlipThreadRoots()` or when // a thread becomes runnable, after we've successfully set the kRunningFlipFunction ThreadFlag. - DCHECK(ReadFlag(ThreadFlag::kRunningFlipFunction)); + DCHECK(ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_relaxed)); Closure* flip_function = GetFlipFunction(); tlsPtr_.flip_function.store(nullptr, std::memory_order_relaxed); DCHECK(flip_function != nullptr); VerifyState(); flip_function->Run(this); - DCHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction)); + DCHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed)); VerifyState(); AtomicClearFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_release); // From here on this thread may go away, and it is no longer safe to access. @@ -1933,12 +1943,12 @@ void Thread::WaitForFlipFunction(Thread* self) const { // Repeat the check after waiting to guard against spurious wakeups (and because // we share the `thread_suspend_count_lock_` and `resume_cond_` with other code). // Check that the thread can't unexpectedly exit while we are running. - DCHECK(self == this || ReadFlag(ThreadFlag::kSuspendRequest) || + DCHECK(self == this || ReadFlag(ThreadFlag::kSuspendRequest, std::memory_order_relaxed) || Locks::thread_list_lock_->IsExclusiveHeld(self)); MutexLock mu(self, *Locks::thread_suspend_count_lock_); while (true) { - StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_acquire); - if (!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) { + // See mutator_gc_coord.md for a discussion of memory ordering for thread flags. + if (!ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) { return; } // We sometimes hold mutator lock here. OK since the flip function must complete quickly. @@ -1958,9 +1968,10 @@ void Thread::WaitForFlipFunctionTestingExited(Thread* self, ThreadExitFlag* tef) // complicated locking dance. MutexLock mu(self, *Locks::thread_suspend_count_lock_); while (true) { - StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_acquire); + // See mutator_gc_coord.md for a discussion of memory ordering for thread flags. + bool running_flip = ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire); Locks::thread_list_lock_->Unlock(self); // So we can wait or return. - if (!old_state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction)) { + if (!running_flip) { return; } resume_cond_->WaitHoldingLocks(self); @@ -1976,7 +1987,7 @@ void Thread::WaitForFlipFunctionTestingExited(Thread* self, ThreadExitFlag* tef) void Thread::FullSuspendCheck(bool implicit) { ScopedTrace trace(__FUNCTION__); - DCHECK(!ReadFlag(ThreadFlag::kSuspensionImmune)); + DCHECK(!ReadFlag(ThreadFlag::kSuspensionImmune, std::memory_order_relaxed)); DCHECK(this == Thread::Current()); VLOG(threads) << this << " self-suspending"; // Make thread appear suspended to other threads, release mutator_lock_. @@ -2707,9 +2718,9 @@ Thread::~Thread() { tlsPtr_.jni_env = nullptr; } CHECK_NE(GetState(), ThreadState::kRunnable); - CHECK(!ReadFlag(ThreadFlag::kCheckpointRequest)); - CHECK(!ReadFlag(ThreadFlag::kEmptyCheckpointRequest)); - CHECK(!ReadFlag(ThreadFlag::kSuspensionImmune)); + CHECK(!ReadFlag(ThreadFlag::kCheckpointRequest, std::memory_order_relaxed)); + CHECK(!ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_relaxed)); + CHECK(!ReadFlag(ThreadFlag::kSuspensionImmune, std::memory_order_relaxed)); CHECK(tlsPtr_.checkpoint_function == nullptr); CHECK_EQ(checkpoint_overflow_.size(), 0u); // A pending flip function request is OK. FlipThreadRoots will have been notified that we @@ -4818,11 +4829,11 @@ mirror::Object* Thread::GetPeerFromOtherThread() { // mutator lock in exclusive mode, and we should not have a pending flip function. if (kIsDebugBuild && Locks::thread_list_lock_->IsExclusiveHeld(self)) { Locks::mutator_lock_->AssertExclusiveHeld(self); - CHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction)); + CHECK(!ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed)); } // Ensure that opeer is not obsolete. EnsureFlipFunctionStarted(self, this); - if (ReadFlag(ThreadFlag::kRunningFlipFunction)) { + if (ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) { // Does not release mutator lock. Hence no new flip requests can be issued. WaitForFlipFunction(self); } @@ -4833,7 +4844,8 @@ mirror::Object* Thread::LockedGetPeerFromOtherThread(ThreadExitFlag* tef) { DCHECK(tlsPtr_.jpeer == nullptr); Thread* self = Thread::Current(); Locks::thread_list_lock_->AssertHeld(self); - if (ReadFlag(ThreadFlag::kPendingFlipFunction)) { + // memory_order_relaxed is OK here, because we recheck it later with acquire order. + if (ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed)) { // It is unsafe to call EnsureFlipFunctionStarted with thread_list_lock_. Thus we temporarily // release it, taking care to handle the case in which "this" thread disapppears while we no // longer hold it. @@ -4844,7 +4856,7 @@ mirror::Object* Thread::LockedGetPeerFromOtherThread(ThreadExitFlag* tef) { return nullptr; } } - if (ReadFlag(ThreadFlag::kRunningFlipFunction)) { + if (ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_acquire)) { // Does not release mutator lock. Hence no new flip requests can be issued. WaitForFlipFunction(self); } diff --git a/runtime/thread.h b/runtime/thread.h index 8ce9854212..3ccf7c3c97 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -127,6 +127,7 @@ enum class ThreadFlag : uint32_t { kSuspendRequest = 1u << 0, // Request that the thread do some checkpoint work and then continue. + // Only modified while holding thread_suspend_count_lock_ . kCheckpointRequest = 1u << 1, // Request that the thread do empty checkpoint and then continue. @@ -158,7 +159,7 @@ enum class ThreadFlag : uint32_t { // in any case not check for such requests; other clients of SuspendAll might. // Prevents a situation in which we are asked to suspend just before we suspend all // other threads, and then notice the suspension request and suspend ourselves, - // leading to deadlock. Guarded by suspend_count_lock_ . + // leading to deadlock. Guarded by thread_suspend_count_lock_ . // Should not ever be set when we try to transition to kRunnable. // TODO(b/296639267): Generalize use to prevent SuspendAll from blocking // in-progress GC. @@ -1451,8 +1452,10 @@ class EXPORT Thread { // Undo the effect of the previous call. Again only invoked by the thread itself. void AllowPreMonitorMutexes(); - bool ReadFlag(ThreadFlag flag) const { - return GetStateAndFlags(std::memory_order_relaxed).IsFlagSet(flag); + // Read a flag with the given memory order. See mutator_gc_coord.md for memory ordering + // considerations. + bool ReadFlag(ThreadFlag flag, std::memory_order order) const { + return GetStateAndFlags(order).IsFlagSet(flag); } void AtomicSetFlag(ThreadFlag flag, std::memory_order order = std::memory_order_seq_cst) { diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 3665b34cd5..6a8a7f566c 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -557,7 +557,7 @@ void ThreadList::RunEmptyCheckpoint() { std::find(runnable_thread_ids.begin(), runnable_thread_ids.end(), tid) != runnable_thread_ids.end(); if (is_in_runnable_thread_ids && - thread->ReadFlag(ThreadFlag::kEmptyCheckpointRequest)) { + thread->ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_relaxed)) { // Found a runnable thread that hasn't responded to the empty checkpoint request. // Assume it's stuck and safe to dump its stack. thread->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT), @@ -877,8 +877,8 @@ void ThreadList::SuspendAll(const char* cause, bool long_suspend) { } // SuspendAllInternal blocks if we are in the middle of a flip. - DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction)); - DCHECK(!self->ReadFlag(ThreadFlag::kRunningFlipFunction)); + DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed)); + DCHECK(!self->ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_relaxed)); ATraceBegin((std::string("Mutator threads suspended for ") + cause).c_str()); diff --git a/runtime/thread_list.h b/runtime/thread_list.h index 1cf28989fb..4bf08dc9c1 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -138,6 +138,13 @@ class ThreadList { // threads must act on that. It is possible that on return there will be threads which have not, // and will not, run the checkpoint_function, and neither have/will any of their ancestors. // + // We guarantee that if a thread calls RunCheckpoint() then, if at point X RunCheckpoint() has + // returned, and all checkpoints have been properly observed to have completed (usually via a + // barrier), then every thread has executed a code sequence S during which it remained in a + // suspended state, such that the call to `RunCheckpoint` happens-before the end of S, and the + // beginning of S happened-before X. Thus after a RunCheckpoint() call, no preexisting + // thread can still be relying on global information it caches between suspend points. + // // TODO: Is it possible to simplify mutator_lock handling here? Should this wait for completion? EXPORT size_t RunCheckpoint(Closure* checkpoint_function, Closure* callback = nullptr, @@ -157,6 +164,10 @@ class ThreadList { // in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by // decrementing the empty checkpoint barrier count. This works even when the weak ref access is // disabled. Only one concurrent use is currently supported. + // TODO(b/382722942): This is intended to guarantee the analogous memory ordering property to + // RunCheckpoint(). It over-optimizes by always avoiding thread suspension and hence does not in + // fact guarantee this. (See the discussion in `mutator_gc_coord.md`.) Fix this by implementing + // this with RunCheckpoint() instead. void RunEmptyCheckpoint() REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); |