Defer heap transitions to heap trimming daemon.
This fixes the case where quick back and forth process state changes
would cause a lot of heap transitions.
Change-Id: Ia5be792edcf26079b2aa23b9c115f6b0a9a39a1c
diff --git a/runtime/gc/collector/ b/runtime/gc/collector/
index 4aff68a..71424bd 100644
--- a/runtime/gc/collector/
+++ b/runtime/gc/collector/
@@ -1347,9 +1347,6 @@
- timings_.NewSplit("RequestHeapTrim");
- heap->RequestHeapTrim();
// Update the cumulative statistics
total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
diff --git a/runtime/gc/ b/runtime/gc/
index 1fcbe4d..2497e6a 100644
--- a/runtime/gc/
+++ b/runtime/gc/
@@ -90,6 +90,11 @@
+ desired_collector_type_(collector_type_),
+ heap_trim_request_lock_(nullptr),
+ heap_trim_target_time_(0),
+ heap_transition_target_time_(0),
+ heap_trim_request_pending_(false),
@@ -127,7 +132,6 @@
- last_trim_time_ms_(0),
/* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
* causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap
@@ -160,16 +164,17 @@
// If we aren't the zygote, switch to the default non zygote allocator. This may update the
// entrypoints.
if (!Runtime::Current()->IsZygote()) {
- ChangeCollector(post_zygote_collector_type_);
+ desired_collector_type_ = post_zygote_collector_type_;
large_object_threshold_ = kDefaultLargeObjectThreshold;
} else {
if (kMovingCollector) {
// We are the zygote, use bump pointer allocation + semi space collector.
- ChangeCollector(kCollectorTypeSS);
+ desired_collector_type_ = kCollectorTypeSS;
} else {
- ChangeCollector(post_zygote_collector_type_);
+ desired_collector_type_ = post_zygote_collector_type_;
+ ChangeCollector(desired_collector_type_);
live_bitmap_.reset(new accounting::HeapBitmap(this));
mark_bitmap_.reset(new accounting::HeapBitmap(this));
@@ -274,7 +279,7 @@
gc_complete_lock_ = new Mutex("GC complete lock");
gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
- last_gc_time_ns_ = NanoTime();
+ heap_trim_request_lock_ = new Mutex("Heap trim request lock");
last_gc_size_ = GetBytesAllocated();
if (ignore_max_footprint_) {
@@ -452,12 +457,12 @@
if (process_state_ != process_state) {
process_state_ = process_state;
if (process_state_ == kProcessStateJankPerceptible) {
- TransitionCollector(post_zygote_collector_type_);
+ // Transition back to foreground right away to prevent jank.
+ RequestHeapTransition(post_zygote_collector_type_, 0);
} else {
- TransitionCollector(background_collector_type_);
+ // Don't delay for debug builds since we may want to stress test the GC.
+ RequestHeapTransition(background_collector_type_, kIsDebugBuild ? 0 : kHeapTransitionWait);
- } else {
- CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
@@ -854,9 +859,40 @@
+void Heap::DoPendingTransitionOrTrim() {
+ Thread* self = Thread::Current();
+ CollectorType desired_collector_type;
+ // Wait until we reach the desired transition time.
+ while (true) {
+ uint64_t wait_time;
+ {
+ MutexLock mu(self, *heap_trim_request_lock_);
+ desired_collector_type = desired_collector_type_;
+ uint64_t current_time = NanoTime();
+ if (current_time >= heap_transition_target_time_) {
+ break;
+ }
+ wait_time = heap_transition_target_time_ - current_time;
+ }
+ ScopedThreadStateChange tsc(self, kSleeping);
+ usleep(wait_time / 1000); // Usleep takes microseconds.
+ }
+ // Transition the heap if the desired collector type is nto the same as the current collector type.
+ TransitionCollector(desired_collector_type);
+ // Do a heap trim if it is needed.
+ Trim();
void Heap::Trim() {
Thread* self = Thread::Current();
+ MutexLock mu(self, *heap_trim_request_lock_);
+ if (!heap_trim_request_pending_ || NanoTime() < heap_trim_target_time_) {
+ return;
+ }
+ heap_trim_request_pending_ = false;
+ }
+ {
// Need to do this before acquiring the locks since we don't want to get suspended while
// holding any locks.
ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
@@ -1741,6 +1777,7 @@
collector->Run(gc_cause, clear_soft_references);
total_objects_freed_ever_ += collector->GetFreedObjects();
total_bytes_freed_ever_ += collector->GetFreedBytes();
+ RequestHeapTrim(Heap::kHeapTrimWait);
// Enqueue cleared references.
// Grow the heap so that we know when to perform the next GC.
@@ -2503,7 +2540,20 @@
-void Heap::RequestHeapTrim() {
+void Heap::RequestHeapTransition(CollectorType desired_collector_type, uint64_t delta_time) {
+ Thread* self = Thread::Current();
+ {
+ MutexLock mu(self, *heap_trim_request_lock_);
+ if (desired_collector_type_ == desired_collector_type) {
+ return;
+ }
+ heap_transition_target_time_ = std::max(heap_transition_target_time_, NanoTime() + delta_time);
+ desired_collector_type_ = desired_collector_type;
+ }
+ SignalHeapTrimDaemon(self);
+void Heap::RequestHeapTrim(uint64_t delta_time) {
// GC completed and now we must decide whether to request a heap trim (advising pages back to the
// kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
// a space it will hold its lock and can become a cause of jank.
@@ -2516,11 +2566,6 @@
// to utilization (which is probably inversely proportional to how much benefit we can expect).
// We could try mincore(2) but that's only a measure of how many pages we haven't given away,
// not how much use we're making of those pages.
- uint64_t ms_time = MilliTime();
- // Don't bother trimming the alloc space if a heap trim occurred in the last two seconds.
- if (ms_time - last_trim_time_ms_ < 2 * 1000) {
- return;
- }
Thread* self = Thread::Current();
Runtime* runtime = Runtime::Current();
@@ -2531,19 +2576,27 @@
- last_trim_time_ms_ = ms_time;
- // Trim only if we do not currently care about pause times.
+ // Request a heap trim only if we do not currently care about pause times.
if (!CareAboutPauseTimes()) {
- JNIEnv* env = self->GetJniEnv();
- DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
- DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != NULL);
- env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
- WellKnownClasses::java_lang_Daemons_requestHeapTrim);
- CHECK(!env->ExceptionCheck());
+ {
+ MutexLock mu(self, *heap_trim_request_lock_);
+ heap_trim_target_time_ = std::max(heap_trim_target_time_, NanoTime() + delta_time);
+ heap_trim_request_pending_ = true;
+ }
+ // Notify the daemon thread which will actually do the heap trim.
+ SignalHeapTrimDaemon(self);
+void Heap::SignalHeapTrimDaemon(Thread* self) {
+ JNIEnv* env = self->GetJniEnv();
+ DCHECK(WellKnownClasses::java_lang_Daemons != nullptr);
+ DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != nullptr);
+ env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
+ WellKnownClasses::java_lang_Daemons_requestHeapTrim);
+ CHECK(!env->ExceptionCheck());
void Heap::RevokeThreadLocalBuffers(Thread* thread) {
if (rosalloc_space_ != nullptr) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index a90af27..fd905a6 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -135,6 +135,10 @@
// Used so that we don't overflow the allocation time atomic integer.
static constexpr size_t kTimeAdjust = 1024;
+ // How long we wait after a GC to perform a heap trim (nanoseconds).
+ static constexpr uint64_t kHeapTrimWait = MsToNs(5000);
+ static constexpr uint64_t kHeapTransitionWait = MsToNs(5000);
// Create a heap with the requested sizes. The possible empty
// image_file_names names specify Spaces to load based on
// ImageWriter output.
@@ -437,8 +441,12 @@
void DumpForSigQuit(std::ostream& os);
+ // Do a pending heap transition or trim.
+ void DoPendingTransitionOrTrim() LOCKS_EXCLUDED(heap_trim_request_lock_);
// Trim the managed and native heaps by releasing unused memory back to the OS.
- void Trim();
+ void Trim() LOCKS_EXCLUDED(heap_trim_request_lock_);
void RevokeThreadLocalBuffers(Thread* thread);
void RevokeAllThreadLocalBuffers();
@@ -639,7 +647,9 @@
collector::GcType WaitForGcToCompleteLocked(Thread* self)
- void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
+ void RequestHeapTransition(CollectorType desired_collector_type, uint64_t delta_time)
+ LOCKS_EXCLUDED(heap_trim_request_lock_);
+ void RequestHeapTrim(uint64_t delta_time) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
bool IsGCRequestPending() const;
@@ -681,6 +691,10 @@
// Clear cards and update the mod union table.
void ProcessCards(TimingLogger& timings);
+ // Signal the heap trim daemon that there is something to do, either a heap transition or heap
+ // trim.
+ void SignalHeapTrimDaemon(Thread* self);
// Push an object onto the allocation stack.
void PushOnAllocationStack(Thread* self, mirror::Object* obj);
@@ -733,6 +747,17 @@
CollectorType post_zygote_collector_type_;
// Which collector we will use when the app is notified of a transition to background.
CollectorType background_collector_type_;
+ // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_.
+ CollectorType desired_collector_type_;
+ // Lock which guards heap trim requests.
+ Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ // When we want to perform the next heap trim (nano seconds).
+ uint64_t heap_trim_target_time_ GUARDED_BY(heap_trim_request_lock_);
+ // When we want to perform the next heap transition (nano seconds).
+ uint64_t heap_transition_target_time_ GUARDED_BY(heap_trim_request_lock_);
+ // If we have a heap trim request pending.
+ bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_);
// How many GC threads we may use for paused parts of garbage collection.
const size_t parallel_gc_threads_;
@@ -854,9 +879,6 @@
// Parallel GC data structures.
UniquePtr<ThreadPool> thread_pool_;
- // The last time a heap trim occurred.
- uint64_t last_trim_time_ms_;
// The nanosecond time at which the last GC ended.
uint64_t last_gc_time_ns_;
diff --git a/runtime/native/ b/runtime/native/
index 8716556..4aa1d10 100644
--- a/runtime/native/
+++ b/runtime/native/
@@ -206,12 +206,11 @@
static void VMRuntime_trimHeap(JNIEnv*, jobject) {
- Runtime::Current()->GetHeap()->Trim();
+ Runtime::Current()->GetHeap()->DoPendingTransitionOrTrim();
static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
- Thread* self = ThreadForEnv(env);
- Runtime::Current()->GetHeap()->ConcurrentGC(self);
+ Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env));
typedef std::map<std::string, mirror::String*> StringTable;