Reland "Trigger fewer GCs during startup"" am: 12dd6fc8d1
Original change: https://android-review.googlesource.com/c/platform/art/+/1811718
Change-Id: I0be9418d97b19f3a143964903e5fa28155132340
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 55fdb72..f7eea75 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -22,6 +22,9 @@
#include <malloc.h> // For mallinfo()
#endif
#include <memory>
+#include <random>
+#include <unistd.h>
+#include <sys/types.h>
#include <vector>
#include "android-base/stringprintf.h"
@@ -326,6 +329,7 @@
next_gc_type_(collector::kGcTypePartial),
capacity_(capacity),
growth_limit_(growth_limit),
+ initial_heap_size_(initial_size),
target_footprint_(initial_size),
// Using kPostMonitorLock as a lock at kDefaultMutexLevel is acquired after
// this one.
@@ -2142,6 +2146,27 @@
return HomogeneousSpaceCompactResult::kSuccess;
}
+void Heap::SetDefaultConcurrentStartBytes() {
+ MutexLock mu(Thread::Current(), *gc_complete_lock_);
+ if (collector_type_running_ != kCollectorTypeNone) {
+ // If a collector is already running, just let it set concurrent_start_bytes_ .
+ return;
+ }
+ SetDefaultConcurrentStartBytesLocked();
+}
+
+void Heap::SetDefaultConcurrentStartBytesLocked() {
+ if (IsGcConcurrent()) {
+ size_t target_footprint = target_footprint_.load(std::memory_order_relaxed);
+ size_t reserve_bytes = target_footprint / 4;
+ reserve_bytes = std::min(reserve_bytes, kMaxConcurrentRemainingBytes);
+ reserve_bytes = std::max(reserve_bytes, kMinConcurrentRemainingBytes);
+ concurrent_start_bytes_ = UnsignedDifference(target_footprint, reserve_bytes);
+ } else {
+ concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
+ }
+}
+
void Heap::ChangeCollector(CollectorType collector_type) {
// TODO: Only do this with all mutators suspended to avoid races.
if (collector_type != collector_type_) {
@@ -2188,13 +2213,7 @@
UNREACHABLE();
}
}
- if (IsGcConcurrent()) {
- concurrent_start_bytes_ =
- UnsignedDifference(target_footprint_.load(std::memory_order_relaxed),
- kMinConcurrentRemainingBytes);
- } else {
- concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
- }
+ SetDefaultConcurrentStartBytesLocked();
}
}
@@ -3568,6 +3587,11 @@
void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran,
size_t bytes_allocated_before_gc) {
+ // We're running in the thread that set collector_type_running_ to something other than none,
+ // thus ensuring that there is only one of us running. Thus
+ // collector_type_running_ != kCollectorTypeNone, but that's a little tricky to turn into a
+ // DCHECK.
+
// We know what our utilization is at this moment.
// This doesn't actually resize any memory. It just lets the heap grow more when necessary.
const size_t bytes_allocated = GetBytesAllocated();
@@ -3693,8 +3717,7 @@
if (target_footprint_.load(std::memory_order_relaxed) == growth_limit_
&& growth_limit_ < capacity_) {
target_footprint_.store(capacity_, std::memory_order_relaxed);
- concurrent_start_bytes_ =
- UnsignedDifference(capacity_, kMinConcurrentRemainingBytes);
+ SetDefaultConcurrentStartBytes();
}
growth_limit_ = capacity_;
ScopedObjectAccess soa(Thread::Current());
@@ -4471,32 +4494,97 @@
<< PrettySize(new_footprint) << " for a " << PrettySize(alloc_size) << " allocation";
}
+// Run a gc if we haven't run one since initial_gc_num. This forces processes to
+// reclaim memory allocated during startup, even if they don't do much
+// allocation post startup. If the process is actively allocating and triggering
+// GCs, or has moved to the background and hence forced a GC, this does nothing.
class Heap::TriggerPostForkCCGcTask : public HeapTask {
public:
- explicit TriggerPostForkCCGcTask(uint64_t target_time) : HeapTask(target_time) {}
+ explicit TriggerPostForkCCGcTask(uint64_t target_time, uint32_t initial_gc_num) :
+ HeapTask(target_time), initial_gc_num_(initial_gc_num) {}
void Run(Thread* self) override {
gc::Heap* heap = Runtime::Current()->GetHeap();
- // Trigger a GC, if not already done. The first GC after fork, whenever it
- // takes place, will adjust the thresholds to normal levels.
- if (heap->target_footprint_.load(std::memory_order_relaxed) == heap->growth_limit_) {
- heap->RequestConcurrentGC(self, kGcCauseBackground, false, heap->GetCurrentGcNum());
+ if (heap->GetCurrentGcNum() == initial_gc_num_) {
+ if (kLogAllGCs) {
+ LOG(INFO) << "Forcing GC for allocation-inactive process";
+ }
+ heap->RequestConcurrentGC(self, kGcCauseBackground, false, initial_gc_num_);
}
}
+ private:
+ uint32_t initial_gc_num_;
};
+// Reduce target footprint, if no GC has occurred since initial_gc_num.
+// If a GC already occurred, it will have done this for us.
+class Heap::ReduceTargetFootprintTask : public HeapTask {
+ public:
+ explicit ReduceTargetFootprintTask(uint64_t target_time, size_t new_target_sz,
+ uint32_t initial_gc_num) :
+ HeapTask(target_time), new_target_sz_(new_target_sz), initial_gc_num_(initial_gc_num) {}
+ void Run(Thread* self) override {
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ MutexLock mu(self, *(heap->gc_complete_lock_));
+ if (heap->GetCurrentGcNum() == initial_gc_num_
+ && heap->collector_type_running_ == kCollectorTypeNone) {
+ size_t target_footprint = heap->target_footprint_.load(std::memory_order_relaxed);
+ if (target_footprint > new_target_sz_) {
+ if (heap->target_footprint_.CompareAndSetStrongRelaxed(target_footprint, new_target_sz_)) {
+ heap->SetDefaultConcurrentStartBytesLocked();
+ }
+ }
+ }
+ }
+ private:
+ size_t new_target_sz_;
+ uint32_t initial_gc_num_;
+};
+
+// Return a pseudo-random integer between 0 and 19999, using the uid as a seed. We want this to
+// be deterministic for a given process, but to vary randomly across processes. Empirically, the
+// uids for processes for which this matters are distinct.
+static uint32_t GetPseudoRandomFromUid() {
+ std::default_random_engine rng(getuid());
+ std::uniform_int_distribution<int> dist(0, 19999);
+ return dist(rng);
+}
+
void Heap::PostForkChildAction(Thread* self) {
+ uint32_t starting_gc_num = GetCurrentGcNum();
+ uint64_t last_adj_time = NanoTime();
+ next_gc_type_ = NonStickyGcType(); // Always start with a full gc.
+
// Temporarily increase target_footprint_ and concurrent_start_bytes_ to
// max values to avoid GC during app launch.
- if (collector_type_ == kCollectorTypeCC && !IsLowMemoryMode()) {
+ if (!IsLowMemoryMode()) {
// Set target_footprint_ to the largest allowed value.
SetIdealFootprint(growth_limit_);
- // Set concurrent_start_bytes_ to half of the heap size.
- size_t target_footprint = target_footprint_.load(std::memory_order_relaxed);
- concurrent_start_bytes_ = std::max(target_footprint / 2, GetBytesAllocated());
+ SetDefaultConcurrentStartBytes();
- GetTaskProcessor()->AddTask(
- self, new TriggerPostForkCCGcTask(NanoTime() + MsToNs(kPostForkMaxHeapDurationMS)));
+ // Shrink heap after kPostForkMaxHeapDurationMS, to force a memory hog process to GC.
+ // This remains high enough that many processes will continue without a GC.
+ if (initial_heap_size_ < growth_limit_) {
+ size_t first_shrink_size = std::max(growth_limit_ / 4, initial_heap_size_);
+ last_adj_time += MsToNs(kPostForkMaxHeapDurationMS);
+ GetTaskProcessor()->AddTask(
+ self, new ReduceTargetFootprintTask(last_adj_time, first_shrink_size, starting_gc_num));
+ // Shrink to a small value after a substantial time period. This will typically force a
+ // GC if none has occurred yet. Has no effect if there was a GC before this anyway, which
+ // is commonly the case, e.g. because of a process transition.
+ if (initial_heap_size_ < first_shrink_size) {
+ last_adj_time += MsToNs(4 * kPostForkMaxHeapDurationMS);
+ GetTaskProcessor()->AddTask(
+ self,
+ new ReduceTargetFootprintTask(last_adj_time, initial_heap_size_, starting_gc_num));
+ }
+ }
}
+ // Schedule a GC after a substantial period of time. This will become a no-op if another GC is
+ // scheduled in the interim. If not, we want to avoid holding onto start-up garbage.
+ uint64_t post_fork_gc_time = last_adj_time
+ + MsToNs(4 * kPostForkMaxHeapDurationMS + GetPseudoRandomFromUid());
+ GetTaskProcessor()->AddTask(self,
+ new TriggerPostForkCCGcTask(post_fork_gc_time, starting_gc_num));
}
void Heap::VisitReflectiveTargets(ReflectiveValueVisitor *visit) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 27616fb..7c6dd1f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -336,9 +336,10 @@
void ChangeAllocator(AllocatorType allocator)
REQUIRES(Locks::mutator_lock_, !Locks::runtime_shutdown_lock_);
- // Change the collector to be one of the possible options (MS, CMS, SS).
+ // Change the collector to be one of the possible options (MS, CMS, SS). Only safe when no
+ // concurrent accesses to the heap are possible.
void ChangeCollector(CollectorType collector_type)
- REQUIRES(Locks::mutator_lock_);
+ REQUIRES(Locks::mutator_lock_, !*gc_complete_lock_);
// The given reference is believed to be to an object in the Java heap, check the soundness of it.
// TODO: NO_THREAD_SAFETY_ANALYSIS since we call this everywhere and it is impossible to find a
@@ -411,7 +412,7 @@
// Removes the growth limit on the alloc space so it may grow to its maximum capacity. Used to
// implement dalvik.system.VMRuntime.clearGrowthLimit.
- void ClearGrowthLimit();
+ void ClearGrowthLimit() REQUIRES(!*gc_complete_lock_);
// Make the current growth limit the new maximum capacity, unmaps pages at the end of spaces
// which will never be used. Used to implement dalvik.system.VMRuntime.clampGrowthLimit.
@@ -464,6 +465,7 @@
// For the alloc space, sets the maximum number of bytes that the heap is allowed to allocate
// from the system. Doesn't allow the space to exceed its growth limit.
+ // Set while we hold gc_complete_lock or collector_type_running_ != kCollectorTypeNone.
void SetIdealFootprint(size_t max_allowed_footprint);
// Blocks the caller until the garbage collector becomes idle and returns the type of GC we
@@ -960,7 +962,7 @@
const Verification* GetVerification() const;
- void PostForkChildAction(Thread* self);
+ void PostForkChildAction(Thread* self) REQUIRES(!*gc_complete_lock_);
void TraceHeapSize(size_t heap_size);
@@ -971,6 +973,7 @@
class CollectorTransitionTask;
class HeapTrimTask;
class TriggerPostForkCCGcTask;
+ class ReduceTargetFootprintTask;
// Compact source space to target space. Returns the collector used.
collector::GarbageCollector* Compact(space::ContinuousMemMapAllocSpace* target_space,
@@ -1177,6 +1180,9 @@
// the target utilization ratio. This should only be called immediately after a full garbage
// collection. bytes_allocated_before_gc is used to measure bytes / second for the period which
// the GC was run.
+ // This is only called by the thread that set collector_type_running_ to a value other than
+ // kCollectorTypeNone, or while holding gc_complete_lock, and ensuring that
+ // collector_type_running_ is kCollectorTypeNone.
void GrowForUtilization(collector::GarbageCollector* collector_ran,
size_t bytes_allocated_before_gc = 0)
REQUIRES(!process_state_update_lock_);
@@ -1269,6 +1275,11 @@
// of a garbage collection.
size_t GetNativeBytes();
+ // Set concurrent_start_bytes_ to a reasonable guess, given target_footprint_ .
+ void SetDefaultConcurrentStartBytes() REQUIRES(!*gc_complete_lock_);
+ // This version assumes no concurrent updaters.
+ void SetDefaultConcurrentStartBytesLocked();
+
// All-known continuous spaces, where objects lie within fixed bounds.
std::vector<space::ContinuousSpace*> continuous_spaces_ GUARDED_BY(Locks::mutator_lock_);
@@ -1385,6 +1396,9 @@
// Task processor, proxies heap trim requests to the daemon threads.
std::unique_ptr<TaskProcessor> task_processor_;
+ // The following are declared volatile only for debugging purposes; it shouldn't otherwise
+ // matter.
+
// Collector type of the running GC.
volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_);
@@ -1406,21 +1420,29 @@
// Only weakly enforced for simultaneous allocations.
size_t growth_limit_;
+ // Requested initial heap size. Temporarily ignored after a fork, but then reestablished after
+ // a while to usually trigger the initial GC.
+ size_t initial_heap_size_;
+
// Target size (as in maximum allocatable bytes) for the heap. Weakly enforced as a limit for
// non-concurrent GC. Used as a guideline for computing concurrent_start_bytes_ in the
- // concurrent GC case.
+ // concurrent GC case. Updates normally occur while collector_type_running_ is not none.
Atomic<size_t> target_footprint_;
+ Mutex process_state_update_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
// Computed with foreground-multiplier in GrowForUtilization() when run in
// jank non-perceptible state. On update to process state from background to
// foreground we set target_footprint_ to this value.
- Mutex process_state_update_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
size_t min_foreground_target_footprint_ GUARDED_BY(process_state_update_lock_);
// When num_bytes_allocated_ exceeds this amount then a concurrent GC should be requested so that
// it completes ahead of an allocation failing.
// A multiple of this is also used to determine when to trigger a GC in response to native
// allocation.
+ // After initialization, this is only updated by the thread that set collector_type_running_ to
+ // a value other than kCollectorTypeNone, or while holding gc_complete_lock, and ensuring that
+ // collector_type_running_ is kCollectorTypeNone.
size_t concurrent_start_bytes_;
// Since the heap was created, how many bytes have been freed.