1 files changed, 104 insertions, 99 deletions
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 268cca0cfd..051f3f7b00 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -78,6 +78,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "handle_scope-inl.h"
 #include "thread_list.h"
+#include "verify_object-inl.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -127,8 +128,6 @@ static constexpr uint32_t kAllocSpaceBeginForDeterministicAoT = 0x40000000;
 // Dump the rosalloc stats on SIGQUIT.
 static constexpr bool kDumpRosAllocStatsOnSigQuit = false;
 
-static constexpr size_t kNativeAllocationHistogramBuckets = 16;
-
 // Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier
 // config.
 static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0;
@@ -194,18 +193,12 @@ Heap::Heap(size_t initial_size,
       capacity_(capacity),
       growth_limit_(growth_limit),
       max_allowed_footprint_(initial_size),
-      native_footprint_gc_watermark_(initial_size),
-      native_need_to_run_finalization_(false),
       concurrent_start_bytes_(std::numeric_limits<size_t>::max()),
       total_bytes_freed_ever_(0),
       total_objects_freed_ever_(0),
       num_bytes_allocated_(0),
-      native_bytes_allocated_(0),
-      native_histogram_lock_("Native allocation lock"),
-      native_allocation_histogram_("Native allocation sizes",
-                                   1U,
-                                   kNativeAllocationHistogramBuckets),
-      native_free_histogram_("Native free sizes", 1U, kNativeAllocationHistogramBuckets),
+      new_native_bytes_allocated_(0),
+      old_native_bytes_allocated_(0),
       num_bytes_freed_revoke_(0),
       verify_missing_card_marks_(false),
       verify_system_weaks_(false),
@@ -293,8 +286,13 @@ Heap::Heap(size_t initial_size,
   if (foreground_collector_type_ == kCollectorTypeCC) {
     // Need to use a low address so that we can allocate a contiguous
     // 2 * Xmx space when there's no image (dex2oat for target).
+#if defined(__LP64__)
     CHECK_GE(300 * MB, non_moving_space_capacity);
     requested_alloc_space_begin = reinterpret_cast<uint8_t*>(300 * MB) - non_moving_space_capacity;
+#else
+    // For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
+    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(0x20000000);
+#endif
   }
 
   // Load image space(s).
@@ -369,7 +367,12 @@ Heap::Heap(size_t initial_size,
                              &error_str));
     CHECK(non_moving_space_mem_map != nullptr) << error_str;
     // Try to reserve virtual memory at a lower address if we have a separate non moving space.
+#if defined(__LP64__)
     request_begin = reinterpret_cast<uint8_t*>(300 * MB);
+#else
+    // For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
+    request_begin = reinterpret_cast<uint8_t*>(0x20000000) + non_moving_space_capacity;
+#endif
   }
   // Attempt to create 2 mem maps at or after the requested begin.
   if (foreground_collector_type_ != kCollectorTypeCC) {
@@ -534,6 +537,12 @@ Heap::Heap(size_t initial_size,
   gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
                                                 *gc_complete_lock_));
+  native_blocking_gc_lock_ = new Mutex("Native blocking GC lock");
+  native_blocking_gc_cond_.reset(new ConditionVariable("Native blocking GC condition variable",
+                                                       *native_blocking_gc_lock_));
+  native_blocking_gc_in_progress_ = false;
+  native_blocking_gcs_finished_ = 0;
+
   thread_flip_lock_ = new Mutex("GC thread flip lock");
   thread_flip_cond_.reset(new ConditionVariable("GC thread flip condition variable",
                                                 *thread_flip_lock_));
@@ -1101,19 +1110,9 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) {
     rosalloc_space_->DumpStats(os);
   }
 
-  {
-    MutexLock mu(Thread::Current(), native_histogram_lock_);
-    if (native_allocation_histogram_.SampleSize() > 0u) {
-      os << "Histogram of native allocation ";
-      native_allocation_histogram_.DumpBins(os);
-      os << " bucket size " << native_allocation_histogram_.BucketWidth() << "\n";
-    }
-    if (native_free_histogram_.SampleSize() > 0u) {
-      os << "Histogram of native free ";
-      native_free_histogram_.DumpBins(os);
-      os << " bucket size " << native_free_histogram_.BucketWidth() << "\n";
-    }
-  }
+  os << "Registered native bytes allocated: "
+     << old_native_bytes_allocated_.LoadRelaxed() + new_native_bytes_allocated_.LoadRelaxed()
+     << "\n";
 
   BaseMutex::DumpAll(os);
 }
@@ -1198,6 +1197,7 @@ Heap::~Heap() {
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
+  delete native_blocking_gc_lock_;
   delete thread_flip_lock_;
   delete pending_task_lock_;
   delete backtrace_lock_;
@@ -2645,6 +2645,13 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type,
   // Approximate heap size.
   ATRACE_INT("Heap size (KB)", bytes_allocated_before_gc / KB);
 
+  if (gc_type == NonStickyGcType()) {
+    // Move all bytes from new_native_bytes_allocated_ to
+    // old_native_bytes_allocated_ now that GC has been triggered, resetting
+    // new_native_bytes_allocated_ to zero in the process.
+    old_native_bytes_allocated_.FetchAndAddRelaxed(new_native_bytes_allocated_.ExchangeRelaxed(0));
+  }
+
   DCHECK_LT(gc_type, collector::kGcTypeMax);
   DCHECK_NE(gc_type, collector::kGcTypeNone);
 
@@ -3352,7 +3359,7 @@ void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) {
 
 void Heap::PreGcVerification(collector::GarbageCollector* gc) {
   if (verify_pre_gc_heap_ || verify_missing_card_marks_ || verify_mod_union_table_) {
-    collector::GarbageCollector::ScopedPause pause(gc);
+    collector::GarbageCollector::ScopedPause pause(gc, false);
     PreGcVerificationPaused(gc);
   }
 }
@@ -3420,7 +3427,7 @@ void Heap::PostGcVerificationPaused(collector::GarbageCollector* gc) {
 
 void Heap::PostGcVerification(collector::GarbageCollector* gc) {
   if (verify_system_weaks_ || verify_post_gc_rosalloc_ || verify_post_gc_heap_) {
-    collector::GarbageCollector::ScopedPause pause(gc);
+    collector::GarbageCollector::ScopedPause pause(gc, false);
     PostGcVerificationPaused(gc);
   }
 }
@@ -3504,18 +3511,6 @@ bool Heap::IsMovableObject(ObjPtr<mirror::Object> obj) const {
   return false;
 }
 
-void Heap::UpdateMaxNativeFootprint() {
-  size_t native_size = native_bytes_allocated_.LoadRelaxed();
-  // TODO: Tune the native heap utilization to be a value other than the java heap utilization.
-  size_t target_size = native_size / GetTargetHeapUtilization();
-  if (target_size > native_size + max_free_) {
-    target_size = native_size + max_free_;
-  } else if (target_size < native_size + min_free_) {
-    target_size = native_size + min_free_;
-  }
-  native_footprint_gc_watermark_ = std::min(growth_limit_, target_size);
-}
-
 collector::GarbageCollector* Heap::FindCollectorByGcType(collector::GcType gc_type) {
   for (const auto& collector : garbage_collectors_) {
     if (collector->GetCollectorType() == collector_type_ &&
@@ -3543,8 +3538,11 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran,
   collector::GcType gc_type = collector_ran->GetGcType();
   const double multiplier = HeapGrowthMultiplier();  // Use the multiplier to grow more for
   // foreground.
-  const uint64_t adjusted_min_free = static_cast<uint64_t>(min_free_ * multiplier);
-  const uint64_t adjusted_max_free = static_cast<uint64_t>(max_free_ * multiplier);
+  // Ensure at least 2.5 MB to temporarily fix excessive GC caused by TLAB ergonomics.
+  const uint64_t adjusted_min_free = std::max(static_cast<uint64_t>(min_free_ * multiplier),
+                                              static_cast<uint64_t>(5 * MB / 2));
+  const uint64_t adjusted_max_free = std::max(static_cast<uint64_t>(max_free_ * multiplier),
+                                              static_cast<uint64_t>(5 * MB / 2));
   if (gc_type != collector::kGcTypeSticky) {
     // Grow the heap for non sticky GC.
     ssize_t delta = bytes_allocated / GetTargetHeapUtilization() - bytes_allocated;
@@ -3552,11 +3550,9 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran,
     target_size = bytes_allocated + delta * multiplier;
     target_size = std::min(target_size, bytes_allocated + adjusted_max_free);
     target_size = std::max(target_size, bytes_allocated + adjusted_min_free);
-    native_need_to_run_finalization_ = true;
     next_gc_type_ = collector::kGcTypeSticky;
   } else {
-    collector::GcType non_sticky_gc_type =
-        HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull;
+    collector::GcType non_sticky_gc_type = NonStickyGcType();
     // Find what the next non sticky collector will be.
     collector::GarbageCollector* non_sticky_collector = FindCollectorByGcType(non_sticky_gc_type);
     // If the throughput of the current sticky GC >= throughput of the non sticky collector, then
@@ -3707,7 +3703,7 @@ void Heap::ConcurrentGC(Thread* self, bool force_full) {
       collector::GcType next_gc_type = next_gc_type_;
       // If forcing full and next gc type is sticky, override with a non-sticky type.
       if (force_full && next_gc_type == collector::kGcTypeSticky) {
-        next_gc_type = HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull;
+        next_gc_type = NonStickyGcType();
       }
       if (CollectGarbageInternal(next_gc_type, kGcCauseBackground, false) ==
           collector::kGcTypeNone) {
@@ -3864,70 +3860,79 @@ void Heap::RunFinalization(JNIEnv* env, uint64_t timeout) {
 }
 
 void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) {
-  Thread* self = ThreadForEnv(env);
-  {
-    MutexLock mu(self, native_histogram_lock_);
-    native_allocation_histogram_.AddValue(bytes);
-  }
-  if (native_need_to_run_finalization_) {
-    RunFinalization(env, kNativeAllocationFinalizeTimeout);
-    UpdateMaxNativeFootprint();
-    native_need_to_run_finalization_ = false;
-  }
-  // Total number of native bytes allocated.
-  size_t new_native_bytes_allocated = native_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes);
-  new_native_bytes_allocated += bytes;
-  if (new_native_bytes_allocated > native_footprint_gc_watermark_) {
-    collector::GcType gc_type = HasZygoteSpace() ? collector::kGcTypePartial :
-        collector::kGcTypeFull;
-
-    // The second watermark is higher than the gc watermark. If you hit this it means you are
-    // allocating native objects faster than the GC can keep up with.
-    if (new_native_bytes_allocated > growth_limit_) {
-      if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) {
-        // Just finished a GC, attempt to run finalizers.
-        RunFinalization(env, kNativeAllocationFinalizeTimeout);
-        CHECK(!env->ExceptionCheck());
-        // Native bytes allocated may be updated by finalization, refresh it.
-        new_native_bytes_allocated = native_bytes_allocated_.LoadRelaxed();
-      }
-      // If we still are over the watermark, attempt a GC for alloc and run finalizers.
-      if (new_native_bytes_allocated > growth_limit_) {
-        CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false);
-        RunFinalization(env, kNativeAllocationFinalizeTimeout);
-        native_need_to_run_finalization_ = false;
-        CHECK(!env->ExceptionCheck());
+  // See the REDESIGN section of go/understanding-register-native-allocation
+  // for an explanation of how RegisterNativeAllocation works.
+  size_t new_value = bytes + new_native_bytes_allocated_.FetchAndAddRelaxed(bytes);
+  if (new_value > NativeAllocationBlockingGcWatermark()) {
+    // Wait for a new GC to finish and finalizers to run, because the
+    // allocation rate is too high.
+    Thread* self = ThreadForEnv(env);
+
+    bool run_gc = false;
+    {
+      MutexLock mu(self, *native_blocking_gc_lock_);
+      uint32_t initial_gcs_finished = native_blocking_gcs_finished_;
+      if (native_blocking_gc_in_progress_) {
+        // A native blocking GC is in progress from the last time the native
+        // allocation blocking GC watermark was exceeded. Wait for that GC to
+        // finish before addressing the fact that we exceeded the blocking
+        // watermark again.
+        do {
+          native_blocking_gc_cond_->Wait(self);
+        } while (native_blocking_gcs_finished_ == initial_gcs_finished);
+        initial_gcs_finished++;
       }
-      // We have just run finalizers, update the native watermark since it is very likely that
-      // finalizers released native managed allocations.
-      UpdateMaxNativeFootprint();
-    } else if (!IsGCRequestPending()) {
-      if (IsGcConcurrent()) {
-        RequestConcurrentGC(self, true);  // Request non-sticky type.
+
+      // It's possible multiple threads have seen that we exceeded the
+      // blocking watermark. Ensure that only one of those threads runs the
+      // blocking GC. The rest of the threads should instead wait for the
+      // blocking GC to complete.
+      if (native_blocking_gc_in_progress_) {
+        do {
+          native_blocking_gc_cond_->Wait(self);
+        } while (native_blocking_gcs_finished_ == initial_gcs_finished);
       } else {
-        CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false);
+        native_blocking_gc_in_progress_ = true;
+        run_gc = true;
       }
     }
+
+    if (run_gc) {
+      CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false);
+      RunFinalization(env, kNativeAllocationFinalizeTimeout);
+      CHECK(!env->ExceptionCheck());
+
+      MutexLock mu(self, *native_blocking_gc_lock_);
+      native_blocking_gc_in_progress_ = false;
+      native_blocking_gcs_finished_++;
+      native_blocking_gc_cond_->Broadcast(self);
+    }
+  } else if (new_value > NativeAllocationGcWatermark() && !IsGCRequestPending()) {
+    // Trigger another GC because there have been enough native bytes
+    // allocated since the last GC.
+    if (IsGcConcurrent()) {
+      RequestConcurrentGC(ThreadForEnv(env), /*force_full*/true);
+    } else {
+      CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false);
+    }
   }
 }
 
-void Heap::RegisterNativeFree(JNIEnv* env, size_t bytes) {
-  size_t expected_size;
-  {
-    MutexLock mu(Thread::Current(), native_histogram_lock_);
-    native_free_histogram_.AddValue(bytes);
-  }
+void Heap::RegisterNativeFree(JNIEnv*, size_t bytes) {
+  // Take the bytes freed out of new_native_bytes_allocated_ first. If
+  // new_native_bytes_allocated_ reaches zero, take the remaining bytes freed
+  // out of old_native_bytes_allocated_ to ensure all freed bytes are
+  // accounted for.
+  size_t allocated;
+  size_t new_freed_bytes;
   do {
-    expected_size = native_bytes_allocated_.LoadRelaxed();
-    if (UNLIKELY(bytes > expected_size)) {
-      ScopedObjectAccess soa(env);
-      env->ThrowNew(WellKnownClasses::java_lang_RuntimeException,
-                    StringPrintf("Attempted to free %zd native bytes with only %zd native bytes "
-                    "registered as allocated", bytes, expected_size).c_str());
-      break;
-    }
-  } while (!native_bytes_allocated_.CompareExchangeWeakRelaxed(expected_size,
-                                                               expected_size - bytes));
+    allocated = new_native_bytes_allocated_.LoadRelaxed();
+    new_freed_bytes = std::min(allocated, bytes);
+  } while (!new_native_bytes_allocated_.CompareExchangeWeakRelaxed(allocated,
+                                                                   allocated - new_freed_bytes));
+  if (new_freed_bytes < bytes) {
+    old_native_bytes_allocated_.FetchAndSubRelaxed(bytes - new_freed_bytes);
+  }
 }
 
 size_t Heap::GetTotalMemory() const {