Add more weak reference information to traces

Separate the marking piece of EnqueueFinalizerReferences.

Report the number of finalizable objects.

Similarly report the number of SoftReferences we encounter
and the amount of time we spend marking as a result.

Add trace information and possibly log entry when we block
dereferencing a WeakReference or the like.

Do the same for JNI WeakGlobals, with some code restructuring
to enable that.

Delete one of the two nested and almost entirely redundant
ProcessReferences ATrace tags, thus reducing the space needed
to display HeapTaskDaemon back to what it was.

Bug: 189738006
Test: Boot sc-dev and look at trace
Change-Id: I198db632d957bcb9353ab945cedc92aa733963f0
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 538d7be..a850da7 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1613,7 +1613,7 @@
   }
 
   {
-    TimingLogger::ScopedTiming split7("ProcessMarkStack", GetTimings());
+    TimingLogger::ScopedTiming split7("Process mark stacks and References", GetTimings());
     // We transition through three mark stack modes (thread-local, shared, GC-exclusive). The
     // primary reasons are the fact that we need to use a checkpoint to process thread-local mark
     // stacks, but after we disable weak refs accesses, we can't use a checkpoint due to a deadlock
@@ -3806,7 +3806,6 @@
 }
 
 void ConcurrentCopying::ProcessReferences(Thread* self) {
-  TimingLogger::ScopedTiming split("ProcessReferences", GetTimings());
   // We don't really need to lock the heap bitmap lock as we use CAS to mark in bitmaps.
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 2e74edf..e34d140 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -20,6 +20,7 @@
 #include "base/mutex.h"
 #include "base/time_utils.h"
 #include "base/utils.h"
+#include "base/systrace.h"
 #include "class_root-inl.h"
 #include "collector/garbage_collector.h"
 #include "jni/java_vm_ext.h"
@@ -99,6 +100,17 @@
       return referent;
     }
   }
+  bool started_trace = false;
+  uint64_t start_millis;
+  auto finish_trace = [](uint64_t start_millis) {
+    ATraceEnd();
+    uint64_t millis = MilliTime() - start_millis;
+    static constexpr uint64_t kReportMillis = 10;  // Long enough to risk dropped frames.
+    if (millis > kReportMillis) {
+      LOG(WARNING) << "Weak pointer dereference blocked for " << millis << " milliseconds.";
+    }
+  };
+
   MutexLock mu(self, *Locks::reference_processor_lock_);
   while ((!kUseReadBarrier && SlowPathEnabled()) ||
          (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
@@ -106,6 +118,9 @@
     // If the referent became cleared, return it. Don't need barrier since thread roots can't get
     // updated until after we leave the function due to holding the mutator lock.
     if (referent == nullptr) {
+      if (started_trace) {
+        finish_trace(start_millis);
+      }
       return nullptr;
     }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
@@ -125,6 +140,9 @@
         // Non null means that it is marked.
         if (!preserving_references_ ||
            (LIKELY(!reference->IsFinalizerReferenceInstance()) && reference->IsUnprocessed())) {
+          if (started_trace) {
+            finish_trace(start_millis);
+          }
           return forwarded_ref;
         }
       }
@@ -132,8 +150,16 @@
     // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
     // presence of threads blocking for weak ref access.
     self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_);
+    if (!started_trace) {
+      ATraceBegin("GetReferent blocked");
+      started_trace = true;
+      start_millis = MilliTime();
+    }
     condition_.WaitHoldingLocks(self);
   }
+  if (started_trace) {
+    finish_trace(start_millis);
+  }
   return reference->GetReferent();
 }
 
@@ -183,8 +209,17 @@
     }
     // TODO: Add smarter logic for preserving soft references. The behavior should be a conditional
     // mark if the SoftReference is supposed to be preserved.
-    soft_reference_queue_.ForwardSoftReferences(collector);
-    collector->ProcessMarkStack();
+    uint32_t non_null_refs = soft_reference_queue_.ForwardSoftReferences(collector);
+    if (ATraceEnabled()) {
+      static constexpr size_t kBufSize = 80;
+      char buf[kBufSize];
+      snprintf(buf, kBufSize, "Marking for %" PRIu32 " SoftReferences", non_null_refs);
+      ATraceBegin(buf);
+      collector->ProcessMarkStack();
+      ATraceEnd();
+    } else {
+      collector->ProcessMarkStack();
+    }
     if (concurrent) {
       StopPreservingReferences(self);
     }
@@ -199,8 +234,19 @@
       StartPreservingReferences(self);
     }
     // Preserve all white objects with finalize methods and schedule them for finalization.
-    finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, collector);
-    collector->ProcessMarkStack();
+    FinalizerStats finalizer_stats =
+        finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, collector);
+    if (ATraceEnabled()) {
+      static constexpr size_t kBufSize = 80;
+      char buf[kBufSize];
+      snprintf(buf, kBufSize, "Marking from %" PRIu32 " / %" PRIu32 " finalizers",
+               finalizer_stats.num_enqueued_, finalizer_stats.num_refs_);
+      ATraceBegin(buf);
+      collector->ProcessMarkStack();
+      ATraceEnd();
+    } else {
+      collector->ProcessMarkStack();
+    }
     if (concurrent) {
       StopPreservingReferences(self);
     }
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 1c751e1..1e3cf93 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -152,10 +152,12 @@
   }
 }
 
-void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+FinalizerStats ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
                                                 collector::GarbageCollector* collector) {
+  uint32_t num_refs(0), num_enqueued(0);
   while (!IsEmpty()) {
     ObjPtr<mirror::FinalizerReference> ref = DequeuePendingReference()->AsFinalizerReference();
+    ++num_refs;
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
     // do_atomic_update is false because this happens during the reference processing phase where
     // Reference.clear() would block.
@@ -170,17 +172,20 @@
         ref->ClearReferent<false>();
       }
       cleared_references->EnqueueReference(ref);
+      ++num_enqueued;
     }
     // Delay disabling the read barrier until here so that the ClearReferent call above in
     // transaction mode will trigger the read barrier.
     DisableReadBarrierForReference(ref->AsReference());
   }
+  return FinalizerStats(num_refs, num_enqueued);
 }
 
-void ReferenceQueue::ForwardSoftReferences(MarkObjectVisitor* visitor) {
+uint32_t ReferenceQueue::ForwardSoftReferences(MarkObjectVisitor* visitor) {
   if (UNLIKELY(IsEmpty())) {
-    return;
+    return 0;
   }
+  uint32_t num_refs(0);
   const ObjPtr<mirror::Reference> head = list_;
   ObjPtr<mirror::Reference> ref = head;
   do {
@@ -189,9 +194,11 @@
       // do_atomic_update is false because mutators can't access the referent due to the weak ref
       // access blocking.
       visitor->MarkHeapReference(referent_addr, /*do_atomic_update=*/ false);
+      ++num_refs;
     }
     ref = ref->GetPendingNext();
   } while (LIKELY(ref != head));
+  return num_refs;
 }
 
 void ReferenceQueue::UpdateRoots(IsMarkedVisitor* visitor) {
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 90f0be7..06243c4 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -49,6 +49,13 @@
 
 class Heap;
 
+struct FinalizerStats {
+  FinalizerStats(size_t num_refs, size_t num_enqueued)
+      : num_refs_(num_refs), num_enqueued_(num_enqueued) {}
+  const uint32_t num_refs_;
+  const uint32_t num_enqueued_;
+};
+
 // Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
 // appropriate java.lang.ref.ReferenceQueue. The linked list is maintained as an unordered,
 // circular, and singly-linked list using the pendingNext fields of the java.lang.ref.Reference
@@ -79,14 +86,15 @@
 
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to
   // the zombie field, and the referent field is cleared.
-  void EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+  FinalizerStats EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
                                   collector::GarbageCollector* collector)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void ForwardSoftReferences(MarkObjectVisitor* visitor)
+  // Returns the number of non-null soft references.
+  uint32_t ForwardSoftReferences(MarkObjectVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Unlink the reference list clearing references objects with white referents. Cleared references
diff --git a/runtime/jni/java_vm_ext.cc b/runtime/jni/java_vm_ext.cc
index e085cd9..b96c358 100644
--- a/runtime/jni/java_vm_ext.cc
+++ b/runtime/jni/java_vm_ext.cc
@@ -679,6 +679,19 @@
   return reinterpret_cast<jobject>(ref);
 }
 
+void JavaVMExt::WaitForWeakGlobalsAccess(Thread* self) {
+  if (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    ATraceBegin("Blocking on WeakGlobal access");
+    do {
+      // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+      // presence of threads blocking for weak ref access.
+      self->CheckEmptyCheckpointFromWeakRefAccess(Locks::jni_weak_globals_lock_);
+      weak_globals_add_condition_.WaitHoldingLocks(self);
+    } while (!MayAccessWeakGlobals(self));
+    ATraceEnd();
+  }
+}
+
 jweak JavaVMExt::AddWeakGlobalRef(Thread* self, ObjPtr<mirror::Object> obj) {
   if (obj == nullptr) {
     return nullptr;
@@ -687,11 +700,8 @@
   // CMS needs this to block for concurrent reference processing because an object allocated during
   // the GC won't be marked and concurrent reference processing would incorrectly clear the JNI weak
   // ref. But CC (kUseReadBarrier == true) doesn't because of the to-space invariant.
-  while (!kUseReadBarrier && UNLIKELY(!MayAccessWeakGlobals(self))) {
-    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
-    // presence of threads blocking for weak ref access.
-    self->CheckEmptyCheckpointFromWeakRefAccess(Locks::jni_weak_globals_lock_);
-    weak_globals_add_condition_.WaitHoldingLocks(self);
+  if (!kUseReadBarrier) {
+    WaitForWeakGlobalsAccess(self);
   }
   std::string error_msg;
   IndirectRef ref = weak_globals_.Add(kIRTFirstSegment, obj, &error_msg);
@@ -829,12 +839,7 @@
   if (kDebugLocking) {
     Locks::jni_weak_globals_lock_->AssertHeld(self);
   }
-  while (UNLIKELY(!MayAccessWeakGlobals(self))) {
-    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
-    // presence of threads blocking for weak ref access.
-    self->CheckEmptyCheckpointFromWeakRefAccess(Locks::jni_weak_globals_lock_);
-    weak_globals_add_condition_.WaitHoldingLocks(self);
-  }
+  WaitForWeakGlobalsAccess(self);
   return weak_globals_.Get(ref);
 }
 
@@ -854,12 +859,7 @@
 bool JavaVMExt::IsWeakGlobalCleared(Thread* self, IndirectRef ref) {
   DCHECK_EQ(IndirectReferenceTable::GetIndirectRefKind(ref), kWeakGlobal);
   MutexLock mu(self, *Locks::jni_weak_globals_lock_);
-  while (UNLIKELY(!MayAccessWeakGlobals(self))) {
-    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
-    // presence of threads blocking for weak ref access.
-    self->CheckEmptyCheckpointFromWeakRefAccess(Locks::jni_weak_globals_lock_);
-    weak_globals_add_condition_.WaitHoldingLocks(self);
-  }
+  WaitForWeakGlobalsAccess(self);
   // When just checking a weak ref has been cleared, avoid triggering the read barrier in decode
   // (DecodeWeakGlobal) so that we won't accidentally mark the object alive. Since the cleared
   // sentinel is a non-moving object, we can compare the ref to it without the read barrier and
diff --git a/runtime/jni/java_vm_ext.h b/runtime/jni/java_vm_ext.h
index 015f85c..ef283aa 100644
--- a/runtime/jni/java_vm_ext.h
+++ b/runtime/jni/java_vm_ext.h
@@ -222,6 +222,10 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::jni_weak_globals_lock_);
 
+  void WaitForWeakGlobalsAccess(Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::jni_weak_globals_lock_);
+
   void CheckGlobalRefAllocationTracking();
 
   Runtime* const runtime_;