Ensure that ConcurrentGC always increments GC num

ConcurrentGC could end up waiting for a GC type like TrimSpaces()
that does not actually end up incrementing completed_gcs_.
It would erroneously think it was done, leaving gcs_requested_
> gcs_completed_, with no task running to perform the requested
GCs, but further requests getting ignored until the next explicit
or heap-overflow GC.

Make ConcurrentGC() actually perform a GC unless the GC number
was incremented. Add a CHECK in ConcurrentGCTask::Run that can catch
this. (Confirmed because it did catch it before we added the fix.)

Have RequestConcurrentGC() return a bool to indicate whether it
did anything. This makes another CHECK possible, and should
eventually allow us to again sleep() until a GC starts.

Bug: 186592536
Bug: 189150802

Test: Build and boot AOSP
Change-Id: Ib11734a9c87b9f9e19c5a3557eac9024f84cadf3
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 93a594e..6f396fe 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -377,7 +377,7 @@
           min_interval_homogeneous_space_compaction_by_oom),
       last_time_homogeneous_space_compaction_by_oom_(NanoTime()),
       gcs_completed_(0u),
-      gcs_requested_(0u),
+      max_gc_requested_(0u),
       pending_collector_transition_(nullptr),
       pending_heap_trim_(nullptr),
       use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom),
@@ -2575,8 +2575,9 @@
   // other things. It seems risky to trigger GCs as a result of such changes.
 }
 
-static inline bool GCNumberLt(uint32_t gcs_completed, uint32_t gcs_requested) {
-  uint32_t difference = gcs_requested - gcs_completed;
+static inline bool GCNumberLt(uint32_t gc_num1, uint32_t gc_num2) {
+  // unsigned comparison, assuming a non-huge difference, but dealing correctly with wrapping.
+  uint32_t difference = gc_num2 - gc_num1;
   bool completed_more_than_requested = difference > 0x80000000;
   return difference > 0 && !completed_more_than_requested;
 }
@@ -2592,6 +2593,7 @@
   switch (gc_type) {
     case collector::kGcTypePartial: {
       if (!HasZygoteSpace()) {
+        // Do not increment gcs_completed_ . We should retry with kGcTypeFull.
         return collector::kGcTypeNone;
       }
       break;
@@ -3727,8 +3729,11 @@
   ConcurrentGCTask(uint64_t target_time, GcCause cause, bool force_full, uint32_t gc_num)
       : HeapTask(target_time), cause_(cause), force_full_(force_full), my_gc_num_(gc_num) {}
   void Run(Thread* self) override {
-    gc::Heap* heap = Runtime::Current()->GetHeap();
+    Runtime* runtime = Runtime::Current();
+    gc::Heap* heap = runtime->GetHeap();
+    DCHECK(GCNumberLt(my_gc_num_, heap->GetCurrentGcNum() + 2));  // <= current_gc_num + 1
     heap->ConcurrentGC(self, cause_, force_full_, my_gc_num_);
+    CHECK(!GCNumberLt(heap->GetCurrentGcNum(), my_gc_num_) || runtime->IsShuttingDown(self));
   }
 
  private:
@@ -3743,43 +3748,55 @@
       !self->IsHandlingStackOverflow();
 }
 
-void Heap::RequestConcurrentGC(Thread* self,
+bool Heap::RequestConcurrentGC(Thread* self,
                                GcCause cause,
                                bool force_full,
                                uint32_t observed_gc_num) {
-  uint32_t gcs_requested = gcs_requested_.load(std::memory_order_relaxed);
-  if (!GCNumberLt(observed_gc_num, gcs_requested)) {
-    // Nobody beat us to requesting the next gc after observed_gc_num.
-    if (CanAddHeapTask(self)
-        && gcs_requested_.CompareAndSetStrongRelaxed(gcs_requested, observed_gc_num + 1)) {
-      task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime(),  // Start straight away.
-                                                          cause,
-                                                          force_full,
-                                                          observed_gc_num + 1));
+  uint32_t max_gc_requested = max_gc_requested_.load(std::memory_order_relaxed);
+  if (!GCNumberLt(observed_gc_num, max_gc_requested)) {
+    // observed_gc_num >= max_gc_requested: Nobody beat us to requesting the next gc.
+    if (CanAddHeapTask(self)) {
+      // Since observed_gc_num >= max_gc_requested, this increases max_gc_requested_, if successful.
+      if (max_gc_requested_.CompareAndSetStrongRelaxed(max_gc_requested, observed_gc_num + 1)) {
+        task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime(),  // Start straight away.
+                                                            cause,
+                                                            force_full,
+                                                            observed_gc_num + 1));
+      }
+      DCHECK(GCNumberLt(observed_gc_num, max_gc_requested_.load(std::memory_order_relaxed)));
+      // If we increased max_gc_requested_, then we added a task that will eventually cause
+      // gcs_completed_ to be incremented (to at least observed_gc_num + 1).
+      // If the CAS failed, somebody else did.
+      return true;
     }
+    return false;
   }
+  return true;  // Vacuously.
 }
 
 void Heap::ConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t requested_gc_num) {
   if (!Runtime::Current()->IsShuttingDown(self)) {
-    // Wait for any GCs currently running to finish.
-    if (WaitForGcToComplete(cause, self) == collector::kGcTypeNone) {
-      // If we can't run the GC type we wanted to run, find the next appropriate one and try
-      // that instead. E.g. can't do partial, so do full instead.
+    // Wait for any GCs currently running to finish. If this incremented GC number, we're done.
+    WaitForGcToComplete(cause, self);
+    if (GCNumberLt(GetCurrentGcNum(), requested_gc_num)) {
       collector::GcType next_gc_type = next_gc_type_;
       // If forcing full and next gc type is sticky, override with a non-sticky type.
       if (force_full && next_gc_type == collector::kGcTypeSticky) {
         next_gc_type = NonStickyGcType();
       }
+      // If we can't run the GC type we wanted to run, find the next appropriate one and try
+      // that instead. E.g. can't do partial, so do full instead.
+      // We must ensure that we run something that ends up inrementing gcs_completed_.
+      // In the kGcTypePartial case, the initial CollectGarbageInternal call may not have that
+      // effect, but the subsequent KGcTypeFull call will.
       if (CollectGarbageInternal(next_gc_type, cause, false, requested_gc_num)
           == collector::kGcTypeNone) {
         for (collector::GcType gc_type : gc_plan_) {
-          // Attempt to run the collector, if we succeed, we are done.
-          uint32_t gcs_completed = GetCurrentGcNum();
-          if (!GCNumberLt(gcs_completed, requested_gc_num)) {
+          if (!GCNumberLt(GetCurrentGcNum(), requested_gc_num)) {
             // Somebody did it for us.
             break;
           }
+          // Attempt to run the collector, if we succeed, we are done.
           if (gc_type > next_gc_type &&
               CollectGarbageInternal(gc_type, cause, false, requested_gc_num)
               != collector::kGcTypeNone) {
@@ -3985,14 +4002,19 @@
   float gc_urgency = NativeMemoryOverTarget(current_native_bytes, is_gc_concurrent);
   if (UNLIKELY(gc_urgency >= 1.0)) {
     if (is_gc_concurrent) {
-      RequestConcurrentGC(self, kGcCauseForNativeAlloc, /*force_full=*/true, starting_gc_num);
+      bool requested =
+          RequestConcurrentGC(self, kGcCauseForNativeAlloc, /*force_full=*/true, starting_gc_num);
       if (gc_urgency > kStopForNativeFactor
           && current_native_bytes > stop_for_native_allocs_) {
         // We're in danger of running out of memory due to rampant native allocation.
         if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
           LOG(INFO) << "Stopping for native allocation, urgency: " << gc_urgency;
         }
-        WaitForGcToComplete(kGcCauseForNativeAlloc, self);
+        if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) == collector::kGcTypeNone) {
+          DCHECK(!requested
+                 || GCNumberLt(starting_gc_num, max_gc_requested_.load(std::memory_order_relaxed)));
+          // TODO: Eventually sleep here again.
+        }
       }
     } else {
       CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false, starting_gc_num + 1);