RegisterNativeAllocation: Avoid case of double blocking gc.

If multiple threads call RegisterNativeAllocation causing the blocking
watermark to be exceeded while a background GC is in progress, then
two back-to-back blocking GCs would be performed when a single
blocking GC is sufficient. For example:

1. Thread A RegisterNativeAllocation triggers background GC1
2. Thread A RegisterNativeAllocation triggers blocking GC2
3. Thread A's GC2 waits for GC1 to complete.
4. Thread B RegisterNativeAllocation sees GC2 in progress and waits for
it to complete before triggering a second blocking GC3.
5. GC1 completes.

Because thread B's RegisterNativeAllocation was called before GC1
completed and GC2 had not begun for real, thread B can simply wait for
GC2 to start and finish before returning, rather than waiting for GC2 to
start and finish and then doing a second blocking GC.

This change fixes the behavior so that only a single blocking GC is
performed in this case.

Bug: 36851903
Test: 004-NativeAllocations run test.
Change-Id: I1e178b9ee7bb68703bdc9a09b1041a982de8b2ce
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index a450a75..ef99673 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -299,7 +299,7 @@
   objects_moved_.StoreRelaxed(0);
   GcCause gc_cause = GetCurrentIteration()->GetGcCause();
   if (gc_cause == kGcCauseExplicit ||
-      gc_cause == kGcCauseForNativeAlloc ||
+      gc_cause == kGcCauseForNativeAllocBlocking ||
       gc_cause == kGcCauseCollectorTransition ||
       GetCurrentIteration()->GetClearSoftReferences()) {
     force_evacuate_all_ = true;
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 41e6051..fae8b2b 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -192,7 +192,7 @@
   RevokeAllThreadLocalBuffers();
   if (generational_) {
     if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit ||
-        GetCurrentIteration()->GetGcCause() == kGcCauseForNativeAlloc ||
+        GetCurrentIteration()->GetGcCause() == kGcCauseForNativeAllocBlocking ||
         GetCurrentIteration()->GetClearSoftReferences()) {
       // If an explicit, native allocation-triggered, or last attempt
       // collection, collect the whole heap.
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index c35ec7c..2bbc86e 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -29,7 +29,7 @@
     case kGcCauseBackground: return "Background";
     case kGcCauseExplicit: return "Explicit";
     case kGcCauseForNativeAlloc: return "NativeAlloc";
-    case kGcCauseForNativeAllocBackground: return "NativeAllocBackground";
+    case kGcCauseForNativeAllocBlocking: return "NativeAllocBlocking";
     case kGcCauseCollectorTransition: return "CollectorTransition";
     case kGcCauseDisableMovingGc: return "DisableMovingGc";
     case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact";
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 41c8943..b8cf3c4 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -31,10 +31,12 @@
   kGcCauseBackground,
   // An explicit System.gc() call.
   kGcCauseExplicit,
-  // GC triggered for a native allocation.
+  // GC triggered for a native allocation when NativeAllocationGcWatermark is exceeded.
+  // (This may be a blocking GC depending on whether we run a non-concurrent collector).
   kGcCauseForNativeAlloc,
-  // Background GC triggered for a native allocation.
-  kGcCauseForNativeAllocBackground,
+  // GC triggered for a native allocation when NativeAllocationBlockingGcWatermark is exceeded.
+  // (This is always a blocking GC).
+  kGcCauseForNativeAllocBlocking,
   // GC triggered for a collector transition.
   kGcCauseCollectorTransition,
   // Not a real GC cause, used when we disable moving GC (currently for GetPrimitiveArrayCritical).
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 298336a..668fb4b 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -563,6 +563,7 @@
   native_blocking_gc_lock_ = new Mutex("Native blocking GC lock");
   native_blocking_gc_cond_.reset(new ConditionVariable("Native blocking GC condition variable",
                                                        *native_blocking_gc_lock_));
+  native_blocking_gc_is_assigned_ = false;
   native_blocking_gc_in_progress_ = false;
   native_blocking_gcs_finished_ = 0;
 
@@ -2695,6 +2696,10 @@
     // old_native_bytes_allocated_ now that GC has been triggered, resetting
     // new_native_bytes_allocated_ to zero in the process.
     old_native_bytes_allocated_.FetchAndAddRelaxed(new_native_bytes_allocated_.ExchangeRelaxed(0));
+    if (gc_cause == kGcCauseForNativeAllocBlocking) {
+      MutexLock mu(self, *native_blocking_gc_lock_);
+      native_blocking_gc_in_progress_ = true;
+    }
   }
 
   DCHECK_LT(gc_type, collector::kGcTypeMax);
@@ -3526,6 +3531,7 @@
     // it results in log spam. kGcCauseExplicit is already logged in LogGC, so avoid it here too.
     if (cause == kGcCauseForAlloc ||
         cause == kGcCauseForNativeAlloc ||
+        cause == kGcCauseForNativeAllocBlocking ||
         cause == kGcCauseDisableMovingGc) {
       VLOG(gc) << "Starting a blocking GC " << cause;
     }
@@ -3927,33 +3933,36 @@
         // finish before addressing the fact that we exceeded the blocking
         // watermark again.
         do {
+          ScopedTrace trace("RegisterNativeAllocation: Wait For Prior Blocking GC Completion");
           native_blocking_gc_cond_->Wait(self);
         } while (native_blocking_gcs_finished_ == initial_gcs_finished);
         initial_gcs_finished++;
       }
 
       // It's possible multiple threads have seen that we exceeded the
-      // blocking watermark. Ensure that only one of those threads runs the
-      // blocking GC. The rest of the threads should instead wait for the
-      // blocking GC to complete.
+      // blocking watermark. Ensure that only one of those threads is assigned
+      // to run the blocking GC. The rest of the threads should instead wait
+      // for the blocking GC to complete.
       if (native_blocking_gcs_finished_ == initial_gcs_finished) {
-        if (native_blocking_gc_in_progress_) {
+        if (native_blocking_gc_is_assigned_) {
           do {
+            ScopedTrace trace("RegisterNativeAllocation: Wait For Blocking GC Completion");
             native_blocking_gc_cond_->Wait(self);
           } while (native_blocking_gcs_finished_ == initial_gcs_finished);
         } else {
-          native_blocking_gc_in_progress_ = true;
+          native_blocking_gc_is_assigned_ = true;
           run_gc = true;
         }
       }
     }
 
     if (run_gc) {
-      CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false);
+      CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAllocBlocking, false);
       RunFinalization(env, kNativeAllocationFinalizeTimeout);
       CHECK(!env->ExceptionCheck());
 
       MutexLock mu(self, *native_blocking_gc_lock_);
+      native_blocking_gc_is_assigned_ = false;
       native_blocking_gc_in_progress_ = false;
       native_blocking_gcs_finished_++;
       native_blocking_gc_cond_->Broadcast(self);
@@ -3962,7 +3971,7 @@
     // Trigger another GC because there have been enough native bytes
     // allocated since the last GC.
     if (IsGcConcurrent()) {
-      RequestConcurrentGC(ThreadForEnv(env), kGcCauseForNativeAllocBackground, /*force_full*/true);
+      RequestConcurrentGC(ThreadForEnv(env), kGcCauseForNativeAlloc, /*force_full*/true);
     } else {
       CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false);
     }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index aa123d8..7287178 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -1237,10 +1237,20 @@
   // old_native_bytes_allocated_ and new_native_bytes_allocated_.
   Atomic<size_t> old_native_bytes_allocated_;
 
-  // Used for synchronization of blocking GCs triggered by
-  // RegisterNativeAllocation.
+  // Used for synchronization when multiple threads call into
+  // RegisterNativeAllocation and require blocking GC.
+  // * If a previous blocking GC is in progress, all threads will wait for
+  // that GC to complete, then wait for one of the threads to complete another
+  // blocking GC.
+  // * If a blocking GC is assigned but not in progress, a thread has been
+  // assigned to run a blocking GC but has not started yet. Threads will wait
+  // for the assigned blocking GC to complete.
+  // * If a blocking GC is not assigned nor in progress, the first thread will
+  // run a blocking GC and signal to other threads that blocking GC has been
+  // assigned.
   Mutex* native_blocking_gc_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::unique_ptr<ConditionVariable> native_blocking_gc_cond_ GUARDED_BY(native_blocking_gc_lock_);
+  bool native_blocking_gc_is_assigned_ GUARDED_BY(native_blocking_gc_lock_);
   bool native_blocking_gc_in_progress_ GUARDED_BY(native_blocking_gc_lock_);
   uint32_t native_blocking_gcs_finished_ GUARDED_BY(native_blocking_gc_lock_);