diff options
author | 2021-08-05 18:30:08 -0700 | |
---|---|---|
committer | 2021-08-07 15:16:49 +0000 | |
commit | 0c3cc6350749a441fd54f8f3f67b7c69775000c8 (patch) | |
tree | b1269dfa3f20f2abee25a880df1ad36884ff3f56 | |
parent | 825e82972fe46fdb0419c42bd7df102df1989ff9 (diff) |
Handle suspend requests in getReferent()
When waiting in getReferent or the like, use a TimedWait, so we
can occasionally check for suspend requests, thus avoiding deadlocks
that can arise from blocking indefinitely in a runnable state.
This is not particularly clean, and may introduce short delays
when we would otherwise deadlock. It's also a bit risky because
we are now releasing the mutator lock in code that previously didn't.
This is a hopefully more correct replacement for aosp/1784003, which
overlooked some of the complications here.
This does not handle a similar problem in the JNI weak reference code.
Each additional use context adds risk here, due to the mutator
lock release.
Bug: 195336624
Bug: 195664026
Test: Build and boot AOSP with much shorter timeouts.
Test: Confirm that the timeout code is invoked.
Change-Id: I0ffb2ffd105bed9dcb8664f92b17cfbcf756a6e0
-rw-r--r-- | runtime/base/mutex.cc | 6 | ||||
-rw-r--r-- | runtime/base/mutex.h | 2 | ||||
-rw-r--r-- | runtime/gc/reference_processor.cc | 30 | ||||
-rw-r--r-- | runtime/gc/reference_processor.h | 4 |
4 files changed, 33 insertions, 9 deletions
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 6574ec0db6..a5fb40d622 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -1105,10 +1105,14 @@ void ConditionVariable::WaitHoldingLocks(Thread* self) { } bool ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) { + guard_.CheckSafeToWait(self); + return TimedWaitHoldingLocks(self, ms, ns); +} + +bool ConditionVariable::TimedWaitHoldingLocks(Thread* self, int64_t ms, int32_t ns) { DCHECK(self == nullptr || self == Thread::Current()); bool timed_out = false; guard_.AssertExclusiveHeld(self); - guard_.CheckSafeToWait(self); unsigned int old_recursion_count = guard_.recursion_count_; #if ART_USE_FUTEXES timespec rel_ts; diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 8f2a8eac39..d4fb7786ab 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -465,11 +465,13 @@ class ConditionVariable { // TODO: No thread safety analysis on Wait and TimedWait as they call mutex operations via their // pointer copy, thereby defeating annotalysis. void Wait(Thread* self) NO_THREAD_SAFETY_ANALYSIS; + // Returns true on timeout. bool TimedWait(Thread* self, int64_t ms, int32_t ns) NO_THREAD_SAFETY_ANALYSIS; // Variant of Wait that should be used with caution. Doesn't validate that no mutexes are held // when waiting. // TODO: remove this. void WaitHoldingLocks(Thread* self) NO_THREAD_SAFETY_ANALYSIS; + bool TimedWaitHoldingLocks(Thread* self, int64_t ms, int32_t ns) NO_THREAD_SAFETY_ANALYSIS; void CheckSafeToWait(Thread* self) NO_THREAD_SAFETY_ANALYSIS { if (kDebugLocking) { diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index e34d140db4..ea0ea4b829 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -147,15 +147,12 @@ ObjPtr<mirror::Object> ReferenceProcessor::GetReferent(Thread* self, } } } - // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the - // presence of threads blocking for weak ref access. - self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_); if (!started_trace) { ATraceBegin("GetReferent blocked"); started_trace = true; start_millis = MilliTime(); } - condition_.WaitHoldingLocks(self); + WaitUntilDoneProcessingReferences(self); } if (started_trace) { finish_trace(start_millis); @@ -380,13 +377,34 @@ void ReferenceProcessor::ClearReferent(ObjPtr<mirror::Reference> ref) { } void ReferenceProcessor::WaitUntilDoneProcessingReferences(Thread* self) { - // Wait until we are done processing reference. + // Wait until we are done processing references. + // TODO: We must hold reference_processor_lock_ to wait, and we cannot release and reacquire + // the mutator lock while we hold it. But we shouldn't remain runnable while we're asleep. + // Is there a way to do this more cleanly if we release the mutator lock in the condvar + // implementation? Without such a fix, we still need to be careful that we only very rarely + // need checkpoint or suspend requests to be serviced while we're waiting here; waiting for + // a timeout is better than a deadlock, but not cheap. See b/195664026 . + bool warned = false; while ((!kUseReadBarrier && SlowPathEnabled()) || (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) { // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the // presence of threads blocking for weak ref access. self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_); - condition_.WaitHoldingLocks(self); + if (condition_.TimedWaitHoldingLocks(self, /*ms=*/ 10, /*nsec=*/ 0)) { + // Timed out. + // We should rarely get here. If we do, temporarily release reference_processor_lock_ and + // mutator lock, so we can respond to checkpoint and suspend requests. + Locks::reference_processor_lock_->ExclusiveUnlock(self); + { + ScopedThreadSuspension sts(self, ThreadState::kSuspended); + if (!warned) { + LOG(WARNING) << "Long wait for reference processor."; + warned = true; + } + usleep(100); + } + Locks::reference_processor_lock_->ExclusiveLock(self); + } } } diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h index 54de5cc572..8ea7bb1297 100644 --- a/runtime/gc/reference_processor.h +++ b/runtime/gc/reference_processor.h @@ -58,7 +58,7 @@ class ReferenceProcessor { // GetReferent fast path as an optimization. void EnableSlowPath() REQUIRES_SHARED(Locks::mutator_lock_); void BroadcastForSlowPath(Thread* self); - // Decode the referent, may block if references are being processed. + // Decode the referent, may block and allow suspension if references are being processed. ObjPtr<mirror::Object> GetReferent(Thread* self, ObjPtr<mirror::Reference> reference) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::reference_processor_lock_); // Collects the cleared references and returns a task, to be executed after FinishGC, that will @@ -89,7 +89,7 @@ class ReferenceProcessor { // referents. void StartPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_); void StopPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_); - // Wait until reference processing is done. + // Wait until reference processing is done. May temporarily release both required locks. void WaitUntilDoneProcessingReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::reference_processor_lock_); |