Estimate allocation rate to predict when to request concurrent GC.

The estimate is bytes allocated between GC divided by time between GC.

Change-Id: I7c82196fdc19061c99651d6d82fd7fcdb3b3608b
diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc
index d02e0cf..d5231ec 100644
--- a/src/gc/mark_sweep.cc
+++ b/src/gc/mark_sweep.cc
@@ -290,9 +290,6 @@
     // Unbind the live and mark bitmaps.
     UnBindBitmaps();
   }
-
-  heap_->GrowForUtilization();
-  timings_.AddSplit("GrowForUtilization");
 }
 
 void MarkSweep::SwapBitmaps() {
@@ -1473,6 +1470,12 @@
 
   heap_->PostGcVerification(this);
 
+  heap_->GrowForUtilization(GetDuration());
+  timings_.AddSplit("GrowForUtilization");
+
+  heap_->RequestHeapTrim();
+  timings_.AddSplit("RequestHeapTrim");
+
   // Update the cumulative statistics
   total_time_ += GetDuration();
   total_paused_time_ += std::accumulate(GetPauseTimes().begin(), GetPauseTimes().end(), 0,
diff --git a/src/heap.cc b/src/heap.cc
index 645d402..472031b 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -49,7 +49,7 @@
 
 static const uint64_t kSlowGcThreshold = MsToNs(100);
 static const uint64_t kLongGcPauseThreshold = MsToNs(5);
-static const bool kDumpGcPerformanceOnShutdown = true;
+static const bool kDumpGcPerformanceOnShutdown = false;
 const double Heap::kDefaultTargetUtilization = 0.5;
 
 static bool GenerateImage(const std::string& image_file_name) {
@@ -149,7 +149,8 @@
       max_allowed_footprint_(initial_size),
       concurrent_start_size_(128 * KB),
       concurrent_min_free_(256 * KB),
-      concurrent_start_bytes_(initial_size - concurrent_start_size_),
+      concurrent_start_bytes_(concurrent_gc ? initial_size - concurrent_start_size_ :
+          std::numeric_limits<size_t>::max()),
       sticky_gc_count_(0),
       total_bytes_freed_(0),
       total_objects_freed_(0),
@@ -164,6 +165,7 @@
       min_alloc_space_size_for_sticky_gc_(2 * MB),
       min_remaining_space_for_sticky_gc_(1 * MB),
       last_trim_time_(0),
+      allocation_rate_(0),
       max_allocation_stack_size_(MB),
       reference_referent_offset_(0),
       reference_queue_offset_(0),
@@ -278,9 +280,9 @@
   live_stack_.reset(ObjectStack::Create("dalvik-live-stack",
                                       max_allocation_stack_size_));
 
-  // It's still too early to take a lock because there are no threads yet,
-  // but we can create the heap lock now. We don't create it earlier to
-  // make it clear that you can't use locks during heap initialization.
+  // It's still too early to take a lock because there are no threads yet, but we can create locks
+  // now. We don't create it earlier to make it clear that you can't use locks during heap
+  // initialization.
   gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
                                                 *gc_complete_lock_));
@@ -288,6 +290,9 @@
   // Create the reference queue lock, this is required so for parrallel object scanning in the GC.
   reference_queue_lock_.reset(new Mutex("reference queue lock"));
 
+  last_gc_time_ = NanoTime();
+  last_gc_size_ = GetBytesAllocated();
+
   // Create our garbage collectors.
   for (size_t i = 0; i < 2; ++i) {
     const bool concurrent = i != 0;
@@ -651,10 +656,7 @@
   // This is safe to do since the GC will never free objects which are neither in the allocation
   // stack or the live bitmap.
   while (!allocation_stack_->AtomicPushBack(obj)) {
-    Thread* self = Thread::Current();
-    self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
     CollectGarbageInternal(kGcTypeSticky, kGcCauseForAlloc, false);
-    self->TransitionFromSuspendedToRunnable();
   }
 }
 
@@ -742,13 +744,10 @@
     }
 
     if (run_gc) {
-      self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
-
       // If we actually ran a different type of Gc than requested, we can skip the index forwards.
       GcType gc_type_ran = CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
-      DCHECK(static_cast<size_t>(gc_type_ran) >= i);
+      DCHECK_GE(static_cast<size_t>(gc_type_ran), i);
       i = static_cast<size_t>(gc_type_ran);
-      self->TransitionFromSuspendedToRunnable();
 
       // Did we free sufficient memory for the allocation to succeed?
       ptr = TryToAllocate(self, space, alloc_size, false);
@@ -774,9 +773,7 @@
            << " allocation";
 
   // We don't need a WaitForConcurrentGcToComplete here either.
-  self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
   CollectGarbageInternal(kGcTypeFull, kGcCauseForAlloc, true);
-  self->TransitionFromSuspendedToRunnable();
   return TryToAllocate(self, space, alloc_size, true);
 }
 
@@ -868,8 +865,6 @@
   // last GC will not have necessarily been cleared.
   Thread* self = Thread::Current();
   WaitForConcurrentGcToComplete(self);
-  ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
-  // CollectGarbageInternal(have_zygote_space_ ? kGcTypePartial : kGcTypeFull, clear_soft_references);
   CollectGarbageInternal(kGcTypeFull, kGcCauseExplicit, clear_soft_references);
 }
 
@@ -958,8 +953,8 @@
 
 GcType Heap::CollectGarbageInternal(GcType gc_type, GcCause gc_cause, bool clear_soft_references) {
   Thread* self = Thread::Current();
+  ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
-  DCHECK_EQ(self->GetState(), kWaitingPerformingGc);
 
   if (self->IsHandlingStackOverflow()) {
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
@@ -997,6 +992,16 @@
     sticky_gc_count_ = 0;
   }
 
+  uint64_t gc_start_time = NanoTime();
+  uint64_t gc_start_size = GetBytesAllocated();
+  // Approximate allocation rate in bytes / second.
+  DCHECK_NE(gc_start_time, last_gc_time_);
+  uint64_t ms_delta = NsToMs(gc_start_time - last_gc_time_);
+  if (ms_delta != 0) {
+    allocation_rate_ = (gc_start_size - last_gc_size_) * 1000 / ms_delta;
+    VLOG(heap) << "Allocation rate: " << PrettySize(allocation_rate_) << "/s";
+  }
+
   DCHECK_LT(gc_type, kGcTypeMax);
   DCHECK_NE(gc_type, kGcTypeNone);
   MarkSweep* collector = NULL;
@@ -1015,9 +1020,8 @@
   collector->Run();
   total_objects_freed_ += collector->GetFreedObjects();
   total_bytes_freed_ += collector->GetFreedBytes();
-  RequestHeapTrim();
 
-  uint64_t duration = collector->GetDuration();
+  const size_t duration = collector->GetDuration();
   std::vector<uint64_t> pauses = collector->GetPauseTimes();
   bool was_slow = duration > kSlowGcThreshold ||
       (gc_cause == kGcCauseForAlloc && duration > kLongGcPauseThreshold);
@@ -1474,26 +1478,40 @@
   max_allowed_footprint_ = max_allowed_footprint;
 }
 
-void Heap::GrowForUtilization() {
+void Heap::GrowForUtilization(uint64_t gc_duration) {
   // We know what our utilization is at this moment.
   // This doesn't actually resize any memory. It just lets the heap grow more when necessary.
-  size_t target_size = num_bytes_allocated_ / GetTargetHeapUtilization();
-  if (target_size > num_bytes_allocated_ + max_free_) {
-    target_size = num_bytes_allocated_ + max_free_;
-  } else if (target_size < num_bytes_allocated_ + min_free_) {
-    target_size = num_bytes_allocated_ + min_free_;
-  }
+  const size_t bytes_allocated = GetBytesAllocated();
+  last_gc_size_ = bytes_allocated;
+  last_gc_time_ = NanoTime();
 
-  // Calculate when to perform the next ConcurrentGC.
-  if (GetFreeMemory() < concurrent_min_free_) {
-    // Not enough free memory to perform concurrent GC.
-    concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
-  } else {
-    // Start a concurrent Gc when we get close to the target size.
-    concurrent_start_bytes_ = target_size - concurrent_start_size_;
+  size_t target_size = bytes_allocated / GetTargetHeapUtilization();
+  if (target_size > bytes_allocated + max_free_) {
+    target_size = bytes_allocated + max_free_;
+  } else if (target_size < bytes_allocated + min_free_) {
+    target_size = bytes_allocated + min_free_;
   }
 
   SetIdealFootprint(target_size);
+
+  // Calculate when to perform the next ConcurrentGC if we have enough free memory.
+  if (concurrent_gc_ && GetFreeMemory() >= concurrent_min_free_) {
+    // Calculate the estimated GC duration.
+    double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
+    // Estimate how many remaining bytes we will have when we need to start the next GC.
+    size_t remaining_bytes = allocation_rate_ * gc_duration_seconds;
+    if (remaining_bytes < max_allowed_footprint_) {
+      // Start a concurrent GC when we get close to the estimated remaining bytes. When the
+      // allocation rate is very high, remaining_bytes could tell us that we should start a GC
+      // right away.
+      concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes, bytes_allocated);
+    } else {
+      // The estimated rate is so high that we should request another GC straight away.
+      concurrent_start_bytes_ = bytes_allocated;
+    }
+    DCHECK_LE(concurrent_start_bytes_, max_allowed_footprint_);
+    DCHECK_LE(max_allowed_footprint_, growth_limit_);
+  }
 }
 
 void Heap::ClearGrowthLimit() {
@@ -1630,6 +1648,7 @@
 void Heap::RequestConcurrentGC(Thread* self) {
   // Make sure that we can do a concurrent GC.
   Runtime* runtime = Runtime::Current();
+  DCHECK(concurrent_gc_);
   if (runtime == NULL || !runtime->IsFinishedStarting() ||
       !runtime->IsConcurrentGcEnabled()) {
     return;
@@ -1655,14 +1674,13 @@
 void Heap::ConcurrentGC(Thread* self) {
   {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    if (Runtime::Current()->IsShuttingDown() || !concurrent_gc_) {
+    if (Runtime::Current()->IsShuttingDown()) {
       return;
     }
   }
 
+  // Wait for any GCs currently running to finish.
   if (WaitForConcurrentGcToComplete(self) == kGcTypeNone) {
-    // Start a concurrent GC as one wasn't in progress
-    ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
     if (alloc_space_->Size() > min_alloc_space_size_for_sticky_gc_) {
       CollectGarbageInternal(kGcTypeSticky, kGcCauseBackground, false);
     } else {
diff --git a/src/heap.h b/src/heap.h
index ccfdcab..a302405 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -368,7 +368,6 @@
   GcType CollectGarbageInternal(GcType gc_plan, GcCause gc_cause, bool clear_soft_references)
       LOCKS_EXCLUDED(gc_complete_lock_,
                      Locks::heap_bitmap_lock_,
-                     Locks::mutator_lock_,
                      Locks::thread_suspend_count_lock_);
 
   void PreGcVerification(GarbageCollector* gc);
@@ -378,7 +377,7 @@
   // Given the current contents of the alloc space, increase the allowed heap footprint to match
   // the target utilization ratio.  This should only be called immediately after a full garbage
   // collection.
-  void GrowForUtilization();
+  void GrowForUtilization(uint64_t gc_duration);
 
   size_t GetPercentFree();
 
@@ -442,7 +441,7 @@
   size_t growth_limit_;
   size_t max_allowed_footprint_;
 
-  // Bytes until concurrent GC starts.
+  // Minimum bytes before concurrent GC starts.
   size_t concurrent_start_size_;
   size_t concurrent_min_free_;
   size_t concurrent_start_bytes_;
@@ -486,6 +485,15 @@
   // Last trim time
   uint64_t last_trim_time_;
 
+  // The time at which the last GC ended.
+  uint64_t last_gc_time_;
+
+  // How many bytes were allocated at the end of the last GC.
+  uint64_t last_gc_size_;
+
+  // Estimated allocation rate (bytes / second).
+  uint64_t allocation_rate_;
+
   UniquePtr<HeapBitmap> live_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
   UniquePtr<HeapBitmap> mark_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);