Handle suspend requests in getReferent()
When waiting in getReferent or the like, use a TimedWait, so we
can occasionally check for suspend requests, thus avoiding deadlocks
that can arise from blocking indefinitely in a runnable state.
This is not particularly clean, and may introduce short delays
when we would otherwise deadlock. It's also a bit risky because
we are now releasing the mutator lock in code that previously didn't.
This is a hopefully more correct replacement for aosp/1784003, which
overlooked some of the complications here.
This does not handle a similar problem in the JNI weak reference code.
Each additional use context adds risk here, due to the mutator
lock release.
Bug: 195336624
Bug: 195664026
Test: Build and boot AOSP with much shorter timeouts.
Test: Confirm that the timeout code is invoked.
Change-Id: I0ffb2ffd105bed9dcb8664f92b17cfbcf756a6e0
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 6574ec0..a5fb40d 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -1105,10 +1105,14 @@
}
bool ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) {
+ guard_.CheckSafeToWait(self);
+ return TimedWaitHoldingLocks(self, ms, ns);
+}
+
+bool ConditionVariable::TimedWaitHoldingLocks(Thread* self, int64_t ms, int32_t ns) {
DCHECK(self == nullptr || self == Thread::Current());
bool timed_out = false;
guard_.AssertExclusiveHeld(self);
- guard_.CheckSafeToWait(self);
unsigned int old_recursion_count = guard_.recursion_count_;
#if ART_USE_FUTEXES
timespec rel_ts;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 8f2a8ea..d4fb778 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -465,11 +465,13 @@
// TODO: No thread safety analysis on Wait and TimedWait as they call mutex operations via their
// pointer copy, thereby defeating annotalysis.
void Wait(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+ // Returns true on timeout.
bool TimedWait(Thread* self, int64_t ms, int32_t ns) NO_THREAD_SAFETY_ANALYSIS;
// Variant of Wait that should be used with caution. Doesn't validate that no mutexes are held
// when waiting.
// TODO: remove this.
void WaitHoldingLocks(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+ bool TimedWaitHoldingLocks(Thread* self, int64_t ms, int32_t ns) NO_THREAD_SAFETY_ANALYSIS;
void CheckSafeToWait(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
if (kDebugLocking) {
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index e34d140..ea0ea4b 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -147,15 +147,12 @@
}
}
}
- // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
- // presence of threads blocking for weak ref access.
- self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_);
if (!started_trace) {
ATraceBegin("GetReferent blocked");
started_trace = true;
start_millis = MilliTime();
}
- condition_.WaitHoldingLocks(self);
+ WaitUntilDoneProcessingReferences(self);
}
if (started_trace) {
finish_trace(start_millis);
@@ -380,13 +377,34 @@
}
void ReferenceProcessor::WaitUntilDoneProcessingReferences(Thread* self) {
- // Wait until we are done processing reference.
+ // Wait until we are done processing references.
+ // TODO: We must hold reference_processor_lock_ to wait, and we cannot release and reacquire
+ // the mutator lock while we hold it. But we shouldn't remain runnable while we're asleep.
+ // Is there a way to do this more cleanly if we release the mutator lock in the condvar
+ // implementation? Without such a fix, we still need to be careful that we only very rarely
+ // need checkpoint or suspend requests to be serviced while we're waiting here; waiting for
+ // a timeout is better than a deadlock, but not cheap. See b/195664026 .
+ bool warned = false;
while ((!kUseReadBarrier && SlowPathEnabled()) ||
(kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
// Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
// presence of threads blocking for weak ref access.
self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_);
- condition_.WaitHoldingLocks(self);
+ if (condition_.TimedWaitHoldingLocks(self, /*ms=*/ 10, /*nsec=*/ 0)) {
+ // Timed out.
+ // We should rarely get here. If we do, temporarily release reference_processor_lock_ and
+ // mutator lock, so we can respond to checkpoint and suspend requests.
+ Locks::reference_processor_lock_->ExclusiveUnlock(self);
+ {
+ ScopedThreadSuspension sts(self, ThreadState::kSuspended);
+ if (!warned) {
+ LOG(WARNING) << "Long wait for reference processor.";
+ warned = true;
+ }
+ usleep(100);
+ }
+ Locks::reference_processor_lock_->ExclusiveLock(self);
+ }
}
}
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 54de5cc..8ea7bb1 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -58,7 +58,7 @@
// GetReferent fast path as an optimization.
void EnableSlowPath() REQUIRES_SHARED(Locks::mutator_lock_);
void BroadcastForSlowPath(Thread* self);
- // Decode the referent, may block if references are being processed.
+ // Decode the referent, may block and allow suspension if references are being processed.
ObjPtr<mirror::Object> GetReferent(Thread* self, ObjPtr<mirror::Reference> reference)
REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::reference_processor_lock_);
// Collects the cleared references and returns a task, to be executed after FinishGC, that will
@@ -89,7 +89,7 @@
// referents.
void StartPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_);
void StopPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_);
- // Wait until reference processing is done.
+ // Wait until reference processing is done. May temporarily release both required locks.
void WaitUntilDoneProcessingReferences(Thread* self)
REQUIRES_SHARED(Locks::mutator_lock_)
REQUIRES(Locks::reference_processor_lock_);