Handle suspend requests in getReferent()

When waiting in getReferent or the like, use a TimedWait, so we
can occasionally check for suspend requests, thus avoiding deadlocks
that can arise from blocking indefinitely in a runnable state.

This is not particularly clean, and may introduce short delays
when we would otherwise deadlock. It's also a bit risky because
we are now releasing the mutator lock in code that previously didn't.

This is a hopefully more correct replacement for aosp/1784003, which
overlooked some of the complications here.

This does not handle a similar problem in the JNI weak reference code.
Each additional use context adds risk here, due to the mutator
lock release.

Bug: 195336624
Bug: 195664026
Test: Build and boot AOSP with much shorter timeouts.
Test: Confirm that the timeout code is invoked.
Change-Id: I0ffb2ffd105bed9dcb8664f92b17cfbcf756a6e0
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 6574ec0..a5fb40d 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -1105,10 +1105,14 @@
 }
 
 bool ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) {
+  guard_.CheckSafeToWait(self);
+  return TimedWaitHoldingLocks(self, ms, ns);
+}
+
+bool ConditionVariable::TimedWaitHoldingLocks(Thread* self, int64_t ms, int32_t ns) {
   DCHECK(self == nullptr || self == Thread::Current());
   bool timed_out = false;
   guard_.AssertExclusiveHeld(self);
-  guard_.CheckSafeToWait(self);
   unsigned int old_recursion_count = guard_.recursion_count_;
 #if ART_USE_FUTEXES
   timespec rel_ts;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 8f2a8ea..d4fb778 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -465,11 +465,13 @@
   // TODO: No thread safety analysis on Wait and TimedWait as they call mutex operations via their
   //       pointer copy, thereby defeating annotalysis.
   void Wait(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+  // Returns true on timeout.
   bool TimedWait(Thread* self, int64_t ms, int32_t ns) NO_THREAD_SAFETY_ANALYSIS;
   // Variant of Wait that should be used with caution. Doesn't validate that no mutexes are held
   // when waiting.
   // TODO: remove this.
   void WaitHoldingLocks(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+  bool TimedWaitHoldingLocks(Thread* self, int64_t ms, int32_t ns) NO_THREAD_SAFETY_ANALYSIS;
 
   void CheckSafeToWait(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
     if (kDebugLocking) {
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index e34d140..ea0ea4b 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -147,15 +147,12 @@
         }
       }
     }
-    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
-    // presence of threads blocking for weak ref access.
-    self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_);
     if (!started_trace) {
       ATraceBegin("GetReferent blocked");
       started_trace = true;
       start_millis = MilliTime();
     }
-    condition_.WaitHoldingLocks(self);
+    WaitUntilDoneProcessingReferences(self);
   }
   if (started_trace) {
     finish_trace(start_millis);
@@ -380,13 +377,34 @@
 }
 
 void ReferenceProcessor::WaitUntilDoneProcessingReferences(Thread* self) {
-  // Wait until we are done processing reference.
+  // Wait until we are done processing references.
+  // TODO: We must hold reference_processor_lock_ to wait, and we cannot release and reacquire
+  // the mutator lock while we hold it. But we shouldn't remain runnable while we're asleep.
+  // Is there a way to do this more cleanly if we release the mutator lock in the condvar
+  // implementation? Without such a fix, we still need to be careful that we only very rarely
+  // need checkpoint or suspend requests to be serviced while we're waiting here; waiting for
+  // a timeout is better than a deadlock, but not cheap. See b/195664026 .
+  bool warned = false;
   while ((!kUseReadBarrier && SlowPathEnabled()) ||
          (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
     // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
     // presence of threads blocking for weak ref access.
     self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_);
-    condition_.WaitHoldingLocks(self);
+    if (condition_.TimedWaitHoldingLocks(self, /*ms=*/ 10, /*nsec=*/ 0)) {
+      // Timed out.
+      // We should rarely get here. If we do, temporarily release reference_processor_lock_ and
+      // mutator lock, so we can respond to checkpoint and suspend requests.
+      Locks::reference_processor_lock_->ExclusiveUnlock(self);
+      {
+        ScopedThreadSuspension sts(self, ThreadState::kSuspended);
+        if (!warned) {
+          LOG(WARNING) << "Long wait for reference processor.";
+          warned = true;
+        }
+        usleep(100);
+      }
+      Locks::reference_processor_lock_->ExclusiveLock(self);
+    }
   }
 }
 
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 54de5cc..8ea7bb1 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -58,7 +58,7 @@
   // GetReferent fast path as an optimization.
   void EnableSlowPath() REQUIRES_SHARED(Locks::mutator_lock_);
   void BroadcastForSlowPath(Thread* self);
-  // Decode the referent, may block if references are being processed.
+  // Decode the referent, may block and allow suspension if references are being processed.
   ObjPtr<mirror::Object> GetReferent(Thread* self, ObjPtr<mirror::Reference> reference)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::reference_processor_lock_);
   // Collects the cleared references and returns a task, to be executed after FinishGC, that will
@@ -89,7 +89,7 @@
   // referents.
   void StartPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_);
   void StopPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_);
-  // Wait until reference processing is done.
+  // Wait until reference processing is done. May temporarily release both required locks.
   void WaitUntilDoneProcessingReferences(Thread* self)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::reference_processor_lock_);