Reland "Trigger fewer GCs during startup"" am: 12dd6fc8d1 Original change: https://android-review.googlesource.com/c/platform/art/+/1811718 Change-Id: I0be9418d97b19f3a143964903e5fa28155132340

commit: 7cabe2826c2486c1b67f151f4b77a542a4dcdf28 [log] [tgz]
author: Hans Boehm <hboehm@google.com> Mon Aug 30 19:10:02 2021 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> Mon Aug 30 19:10:02 2021 +0000
tree: b06451046c34d9cf52a41f0a8584abc38194e956
parent: fcb84ff87aee0ab384ef3b35c8e56ff249534f81 [diff]
parent: 12dd6fc8d125e594376fdca306da315d01f12069 [diff]
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 55fdb72..f7eea75 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc

@@ -22,6 +22,9 @@
 #include <malloc.h>  // For mallinfo()
 #endif
 #include <memory>
+#include <random>
+#include <unistd.h>
+#include <sys/types.h>
 #include <vector>
 
 #include "android-base/stringprintf.h"
@@ -326,6 +329,7 @@
       next_gc_type_(collector::kGcTypePartial),
       capacity_(capacity),
       growth_limit_(growth_limit),
+      initial_heap_size_(initial_size),
       target_footprint_(initial_size),
       // Using kPostMonitorLock as a lock at kDefaultMutexLevel is acquired after
       // this one.
@@ -2142,6 +2146,27 @@
   return HomogeneousSpaceCompactResult::kSuccess;
 }
 
+void Heap::SetDefaultConcurrentStartBytes() {
+  MutexLock mu(Thread::Current(), *gc_complete_lock_);
+  if (collector_type_running_ != kCollectorTypeNone) {
+    // If a collector is already running, just let it set concurrent_start_bytes_ .
+    return;
+  }
+  SetDefaultConcurrentStartBytesLocked();
+}
+
+void Heap::SetDefaultConcurrentStartBytesLocked() {
+  if (IsGcConcurrent()) {
+    size_t target_footprint = target_footprint_.load(std::memory_order_relaxed);
+    size_t reserve_bytes = target_footprint / 4;
+    reserve_bytes = std::min(reserve_bytes, kMaxConcurrentRemainingBytes);
+    reserve_bytes = std::max(reserve_bytes, kMinConcurrentRemainingBytes);
+    concurrent_start_bytes_ = UnsignedDifference(target_footprint, reserve_bytes);
+  } else {
+    concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
+  }
+}
+
 void Heap::ChangeCollector(CollectorType collector_type) {
   // TODO: Only do this with all mutators suspended to avoid races.
   if (collector_type != collector_type_) {
@@ -2188,13 +2213,7 @@
         UNREACHABLE();
       }
     }
-    if (IsGcConcurrent()) {
-      concurrent_start_bytes_ =
-          UnsignedDifference(target_footprint_.load(std::memory_order_relaxed),
-                             kMinConcurrentRemainingBytes);
-    } else {
-      concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
-    }
+    SetDefaultConcurrentStartBytesLocked();
   }
 }
 
@@ -3568,6 +3587,11 @@
 
 void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran,
                               size_t bytes_allocated_before_gc) {
+  // We're running in the thread that set collector_type_running_ to something other than none,
+  // thus ensuring that there is only one of us running. Thus
+  // collector_type_running_ != kCollectorTypeNone, but that's a little tricky to turn into a
+  // DCHECK.
+
   // We know what our utilization is at this moment.
   // This doesn't actually resize any memory. It just lets the heap grow more when necessary.
   const size_t bytes_allocated = GetBytesAllocated();
@@ -3693,8 +3717,7 @@
   if (target_footprint_.load(std::memory_order_relaxed) == growth_limit_
       && growth_limit_ < capacity_) {
     target_footprint_.store(capacity_, std::memory_order_relaxed);
-    concurrent_start_bytes_ =
-        UnsignedDifference(capacity_, kMinConcurrentRemainingBytes);
+    SetDefaultConcurrentStartBytes();
   }
   growth_limit_ = capacity_;
   ScopedObjectAccess soa(Thread::Current());
@@ -4471,32 +4494,97 @@
              << PrettySize(new_footprint) << " for a " << PrettySize(alloc_size) << " allocation";
 }
 
+// Run a gc if we haven't run one since initial_gc_num. This forces processes to
+// reclaim memory allocated during startup, even if they don't do much
+// allocation post startup. If the process is actively allocating and triggering
+// GCs, or has moved to the background and hence forced a GC, this does nothing.
 class Heap::TriggerPostForkCCGcTask : public HeapTask {
  public:
-  explicit TriggerPostForkCCGcTask(uint64_t target_time) : HeapTask(target_time) {}
+  explicit TriggerPostForkCCGcTask(uint64_t target_time, uint32_t initial_gc_num) :
+      HeapTask(target_time), initial_gc_num_(initial_gc_num) {}
   void Run(Thread* self) override {
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    // Trigger a GC, if not already done. The first GC after fork, whenever it
-    // takes place, will adjust the thresholds to normal levels.
-    if (heap->target_footprint_.load(std::memory_order_relaxed) == heap->growth_limit_) {
-      heap->RequestConcurrentGC(self, kGcCauseBackground, false, heap->GetCurrentGcNum());
+    if (heap->GetCurrentGcNum() == initial_gc_num_) {
+      if (kLogAllGCs) {
+        LOG(INFO) << "Forcing GC for allocation-inactive process";
+      }
+      heap->RequestConcurrentGC(self, kGcCauseBackground, false, initial_gc_num_);
     }
   }
+ private:
+  uint32_t initial_gc_num_;
 };
 
+// Reduce target footprint, if no GC has occurred since initial_gc_num.
+// If a GC already occurred, it will have done this for us.
+class Heap::ReduceTargetFootprintTask : public HeapTask {
+ public:
+  explicit ReduceTargetFootprintTask(uint64_t target_time, size_t new_target_sz,
+                                     uint32_t initial_gc_num) :
+      HeapTask(target_time), new_target_sz_(new_target_sz), initial_gc_num_(initial_gc_num) {}
+  void Run(Thread* self) override {
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    MutexLock mu(self, *(heap->gc_complete_lock_));
+    if (heap->GetCurrentGcNum() == initial_gc_num_
+        && heap->collector_type_running_ == kCollectorTypeNone) {
+      size_t target_footprint = heap->target_footprint_.load(std::memory_order_relaxed);
+      if (target_footprint > new_target_sz_) {
+        if (heap->target_footprint_.CompareAndSetStrongRelaxed(target_footprint, new_target_sz_)) {
+          heap->SetDefaultConcurrentStartBytesLocked();
+        }
+      }
+    }
+  }
+ private:
+  size_t new_target_sz_;
+  uint32_t initial_gc_num_;
+};
+
+// Return a pseudo-random integer between 0 and 19999, using the uid as a seed.  We want this to
+// be deterministic for a given process, but to vary randomly across processes. Empirically, the
+// uids for processes for which this matters are distinct.
+static uint32_t GetPseudoRandomFromUid() {
+  std::default_random_engine rng(getuid());
+  std::uniform_int_distribution<int> dist(0, 19999);
+  return dist(rng);
+}
+
 void Heap::PostForkChildAction(Thread* self) {
+  uint32_t starting_gc_num = GetCurrentGcNum();
+  uint64_t last_adj_time = NanoTime();
+  next_gc_type_ = NonStickyGcType();  // Always start with a full gc.
+
   // Temporarily increase target_footprint_ and concurrent_start_bytes_ to
   // max values to avoid GC during app launch.
-  if (collector_type_ == kCollectorTypeCC && !IsLowMemoryMode()) {
+  if (!IsLowMemoryMode()) {
     // Set target_footprint_ to the largest allowed value.
     SetIdealFootprint(growth_limit_);
-    // Set concurrent_start_bytes_ to half of the heap size.
-    size_t target_footprint = target_footprint_.load(std::memory_order_relaxed);
-    concurrent_start_bytes_ = std::max(target_footprint / 2, GetBytesAllocated());
+    SetDefaultConcurrentStartBytes();
 
-    GetTaskProcessor()->AddTask(
-        self, new TriggerPostForkCCGcTask(NanoTime() + MsToNs(kPostForkMaxHeapDurationMS)));
+    // Shrink heap after kPostForkMaxHeapDurationMS, to force a memory hog process to GC.
+    // This remains high enough that many processes will continue without a GC.
+    if (initial_heap_size_ < growth_limit_) {
+      size_t first_shrink_size = std::max(growth_limit_ / 4, initial_heap_size_);
+      last_adj_time += MsToNs(kPostForkMaxHeapDurationMS);
+      GetTaskProcessor()->AddTask(
+          self, new ReduceTargetFootprintTask(last_adj_time, first_shrink_size, starting_gc_num));
+      // Shrink to a small value after a substantial time period. This will typically force a
+      // GC if none has occurred yet. Has no effect if there was a GC before this anyway, which
+      // is commonly the case, e.g. because of a process transition.
+      if (initial_heap_size_ < first_shrink_size) {
+        last_adj_time += MsToNs(4 * kPostForkMaxHeapDurationMS);
+        GetTaskProcessor()->AddTask(
+            self,
+            new ReduceTargetFootprintTask(last_adj_time, initial_heap_size_, starting_gc_num));
+      }
+    }
   }
+  // Schedule a GC after a substantial period of time. This will become a no-op if another GC is
+  // scheduled in the interim. If not, we want to avoid holding onto start-up garbage.
+  uint64_t post_fork_gc_time = last_adj_time
+      + MsToNs(4 * kPostForkMaxHeapDurationMS + GetPseudoRandomFromUid());
+  GetTaskProcessor()->AddTask(self,
+                              new TriggerPostForkCCGcTask(post_fork_gc_time, starting_gc_num));
 }
 
 void Heap::VisitReflectiveTargets(ReflectiveValueVisitor *visit) {

diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 27616fb..7c6dd1f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h

@@ -336,9 +336,10 @@
   void ChangeAllocator(AllocatorType allocator)
       REQUIRES(Locks::mutator_lock_, !Locks::runtime_shutdown_lock_);
 
-  // Change the collector to be one of the possible options (MS, CMS, SS).
+  // Change the collector to be one of the possible options (MS, CMS, SS). Only safe when no
+  // concurrent accesses to the heap are possible.
   void ChangeCollector(CollectorType collector_type)
-      REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::mutator_lock_, !*gc_complete_lock_);
 
   // The given reference is believed to be to an object in the Java heap, check the soundness of it.
   // TODO: NO_THREAD_SAFETY_ANALYSIS since we call this everywhere and it is impossible to find a
@@ -411,7 +412,7 @@
 
   // Removes the growth limit on the alloc space so it may grow to its maximum capacity. Used to
   // implement dalvik.system.VMRuntime.clearGrowthLimit.
-  void ClearGrowthLimit();
+  void ClearGrowthLimit() REQUIRES(!*gc_complete_lock_);
 
   // Make the current growth limit the new maximum capacity, unmaps pages at the end of spaces
   // which will never be used. Used to implement dalvik.system.VMRuntime.clampGrowthLimit.
@@ -464,6 +465,7 @@
 
   // For the alloc space, sets the maximum number of bytes that the heap is allowed to allocate
   // from the system. Doesn't allow the space to exceed its growth limit.
+  // Set while we hold gc_complete_lock or collector_type_running_ != kCollectorTypeNone.
   void SetIdealFootprint(size_t max_allowed_footprint);
 
   // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
@@ -960,7 +962,7 @@
 
   const Verification* GetVerification() const;
 
-  void PostForkChildAction(Thread* self);
+  void PostForkChildAction(Thread* self) REQUIRES(!*gc_complete_lock_);
 
   void TraceHeapSize(size_t heap_size);
 
@@ -971,6 +973,7 @@
   class CollectorTransitionTask;
   class HeapTrimTask;
   class TriggerPostForkCCGcTask;
+  class ReduceTargetFootprintTask;
 
   // Compact source space to target space. Returns the collector used.
   collector::GarbageCollector* Compact(space::ContinuousMemMapAllocSpace* target_space,
@@ -1177,6 +1180,9 @@
   // the target utilization ratio.  This should only be called immediately after a full garbage
   // collection. bytes_allocated_before_gc is used to measure bytes / second for the period which
   // the GC was run.
+  // This is only called by the thread that set collector_type_running_ to a value other than
+  // kCollectorTypeNone, or while holding gc_complete_lock, and ensuring that
+  // collector_type_running_ is kCollectorTypeNone.
   void GrowForUtilization(collector::GarbageCollector* collector_ran,
                           size_t bytes_allocated_before_gc = 0)
       REQUIRES(!process_state_update_lock_);
@@ -1269,6 +1275,11 @@
   // of a garbage collection.
   size_t GetNativeBytes();
 
+  // Set concurrent_start_bytes_ to a reasonable guess, given target_footprint_ .
+  void SetDefaultConcurrentStartBytes() REQUIRES(!*gc_complete_lock_);
+  // This version assumes no concurrent updaters.
+  void SetDefaultConcurrentStartBytesLocked();
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_ GUARDED_BY(Locks::mutator_lock_);
 
@@ -1385,6 +1396,9 @@
   // Task processor, proxies heap trim requests to the daemon threads.
   std::unique_ptr<TaskProcessor> task_processor_;
 
+  // The following are declared volatile only for debugging purposes; it shouldn't otherwise
+  // matter.
+
   // Collector type of the running GC.
   volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_);
 
@@ -1406,21 +1420,29 @@
   // Only weakly enforced for simultaneous allocations.
   size_t growth_limit_;
 
+  // Requested initial heap size. Temporarily ignored after a fork, but then reestablished after
+  // a while to usually trigger the initial GC.
+  size_t initial_heap_size_;
+
   // Target size (as in maximum allocatable bytes) for the heap. Weakly enforced as a limit for
   // non-concurrent GC. Used as a guideline for computing concurrent_start_bytes_ in the
-  // concurrent GC case.
+  // concurrent GC case. Updates normally occur while collector_type_running_ is not none.
   Atomic<size_t> target_footprint_;
 
+  Mutex process_state_update_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
   // Computed with foreground-multiplier in GrowForUtilization() when run in
   // jank non-perceptible state. On update to process state from background to
   // foreground we set target_footprint_ to this value.
-  Mutex process_state_update_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   size_t min_foreground_target_footprint_ GUARDED_BY(process_state_update_lock_);
 
   // When num_bytes_allocated_ exceeds this amount then a concurrent GC should be requested so that
   // it completes ahead of an allocation failing.
   // A multiple of this is also used to determine when to trigger a GC in response to native
   // allocation.
+  // After initialization, this is only updated by the thread that set collector_type_running_ to
+  // a value other than kCollectorTypeNone, or while holding gc_complete_lock, and ensuring that
+  // collector_type_running_ is kCollectorTypeNone.
   size_t concurrent_start_bytes_;
 
   // Since the heap was created, how many bytes have been freed.
commit	7cabe2826c2486c1b67f151f4b77a542a4dcdf28	[log] [tgz]
author	Hans Boehm <hboehm@google.com>	Mon Aug 30 19:10:02 2021 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	Mon Aug 30 19:10:02 2021 +0000
tree	b06451046c34d9cf52a41f0a8584abc38194e956
parent	fcb84ff87aee0ab384ef3b35c8e56ff249534f81 [diff]
parent	12dd6fc8d125e594376fdca306da315d01f12069 [diff]