From 0c3cc6350749a441fd54f8f3f67b7c69775000c8 Mon Sep 17 00:00:00 2001 From: Hans Boehm Date: Thu, 5 Aug 2021 18:30:08 -0700 Subject: Handle suspend requests in getReferent() When waiting in getReferent or the like, use a TimedWait, so we can occasionally check for suspend requests, thus avoiding deadlocks that can arise from blocking indefinitely in a runnable state. This is not particularly clean, and may introduce short delays when we would otherwise deadlock. It's also a bit risky because we are now releasing the mutator lock in code that previously didn't. This is a hopefully more correct replacement for aosp/1784003, which overlooked some of the complications here. This does not handle a similar problem in the JNI weak reference code. Each additional use context adds risk here, due to the mutator lock release. Bug: 195336624 Bug: 195664026 Test: Build and boot AOSP with much shorter timeouts. Test: Confirm that the timeout code is invoked. Change-Id: I0ffb2ffd105bed9dcb8664f92b17cfbcf756a6e0 --- runtime/gc/reference_processor.cc | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'runtime/gc/reference_processor.cc') diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index e34d140db4..ea0ea4b829 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -147,15 +147,12 @@ ObjPtr ReferenceProcessor::GetReferent(Thread* self, } } } - // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the - // presence of threads blocking for weak ref access. - self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_); if (!started_trace) { ATraceBegin("GetReferent blocked"); started_trace = true; start_millis = MilliTime(); } - condition_.WaitHoldingLocks(self); + WaitUntilDoneProcessingReferences(self); } if (started_trace) { finish_trace(start_millis); @@ -380,13 +377,34 @@ void ReferenceProcessor::ClearReferent(ObjPtr ref) { } void ReferenceProcessor::WaitUntilDoneProcessingReferences(Thread* self) { - // Wait until we are done processing reference. + // Wait until we are done processing references. + // TODO: We must hold reference_processor_lock_ to wait, and we cannot release and reacquire + // the mutator lock while we hold it. But we shouldn't remain runnable while we're asleep. + // Is there a way to do this more cleanly if we release the mutator lock in the condvar + // implementation? Without such a fix, we still need to be careful that we only very rarely + // need checkpoint or suspend requests to be serviced while we're waiting here; waiting for + // a timeout is better than a deadlock, but not cheap. See b/195664026 . + bool warned = false; while ((!kUseReadBarrier && SlowPathEnabled()) || (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) { // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the // presence of threads blocking for weak ref access. self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_); - condition_.WaitHoldingLocks(self); + if (condition_.TimedWaitHoldingLocks(self, /*ms=*/ 10, /*nsec=*/ 0)) { + // Timed out. + // We should rarely get here. If we do, temporarily release reference_processor_lock_ and + // mutator lock, so we can respond to checkpoint and suspend requests. + Locks::reference_processor_lock_->ExclusiveUnlock(self); + { + ScopedThreadSuspension sts(self, ThreadState::kSuspended); + if (!warned) { + LOG(WARNING) << "Long wait for reference processor."; + warned = true; + } + usleep(100); + } + Locks::reference_processor_lock_->ExclusiveLock(self); + } } } -- cgit v1.2.3-59-g8ed1b