Improve OOME handling and detection

Do not trigger an OOME just because we did not see much free memory after running a GC; someone else may have used it up in the interim. Attempt to handle OOME after the GC has shut down in a somewhat better way. This is still not a great solution, but since real Android apps don't shut down "normally", this is a significant improvement for a test-only problem. Simplify the homogeneous space compaction code. We now also insist that after homogeneous space compaction, a certain fraction of the heap be empty. (Modern GC configurations don't use this code anyway.) Add runFinalization() call to 2042-reference-processing. Without that, and with changes to the OOME code that should have been irrelevant, I saw it running out of memory, presumably because reference processing was falling behind. Once the main thread is close to running out of memory, it may not get around to processing references anymore, making things worse. Bug: 288982675 Bug: 288446884 Test: TreeHugger Change-Id: I05fbdc13194f64b7ce959db0286d134a980f9fa9
author: Hans Boehm <hboehm@google.com> 2023-06-27 16:30:53 -0700
committer: Hans Boehm <hboehm@google.com> 2023-07-12 15:56:58 +0000
commit: b95fee39e62ff5a24493c256c6bed87a2f85aa4a (patch)
tree: 1c57faf86a0e017c1e63d129719f5945a78af746
parent: 35f9047ca7b618556c1b01a1034019433a026869 (diff)
2 files changed, 74 insertions, 52 deletions
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 478c14c81b..3c0b573713 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -176,8 +176,13 @@ static constexpr size_t kVerifyObjectAllocationStackSize = 16 * KB /
 static constexpr size_t kDefaultAllocationStackSize = 8 * MB /
     sizeof(mirror::HeapReference<mirror::Object>);
 
+// If we violate BOTH of the following constraints, we throw OOME.
+// They differ due to concurrent allocation.
 // After a GC (due to allocation failure) we should retrieve at least this
-// fraction of the current max heap size. Otherwise throw OOME.
+// fraction of the current max heap size.
+static constexpr double kMinFreedHeapAfterGcForAlloc = 0.05;
+// After a GC (due to allocation failure), at least this fraction of the
+// heap should be available.
 static constexpr double kMinFreeHeapAfterGcForAlloc = 0.01;
 
 // For deterministic compilation, we need the heap to be at a well-known address.
@@ -1929,12 +1934,29 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self,
       return ptr;
     }
   }
+  if (IsGCDisabledForShutdown()) {
+    // We're just shutting down and GCs don't work anymore. Try a different allocator.
+    mirror::Object* ptr = TryToAllocate<true, false>(self,
+                                                     kAllocatorTypeNonMoving,
+                                                     alloc_size,
+                                                     bytes_allocated,
+                                                     usable_size,
+                                                     bytes_tl_bulk_allocated);
+    if (ptr != nullptr) {
+      return ptr;
+    }
+  }
 
+  uint64_t bytes_freed_before;
   auto have_reclaimed_enough = [&]() {
     size_t curr_bytes_allocated = GetBytesAllocated();
-    double curr_free_heap =
-        static_cast<double>(growth_limit_ - curr_bytes_allocated) / growth_limit_;
-    return curr_free_heap >= kMinFreeHeapAfterGcForAlloc;
+    size_t free_heap = UnsignedDifference(growth_limit_, curr_bytes_allocated);
+    uint64_t newly_freed = GetBytesFreedEver() - bytes_freed_before;
+    DCHECK(newly_freed < (static_cast<uint64_t>(1) << 60));  // Didn't wrap.
+    double free_heap_ratio = static_cast<double>(free_heap) / growth_limit_;
+    double newly_freed_ratio = static_cast<double>(newly_freed) / growth_limit_;
+    return free_heap_ratio >= kMinFreeHeapAfterGcForAlloc ||
+           newly_freed_ratio >= kMinFreedHeapAfterGcForAlloc;
   };
   // We perform one GC as per the next_gc_type_ (chosen in GrowForUtilization),
   // if it's not already tried. If that doesn't succeed then go for the most
@@ -1971,58 +1993,42 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self,
   // We don't need a WaitForGcToComplete here either.
   // TODO: Should check whether another thread already just ran a GC with soft
   // references.
+
   DCHECK(!gc_plan_.empty());
-  pre_oome_gc_count_.fetch_add(1, std::memory_order_relaxed);
-  PERFORM_SUSPENDING_OPERATION(
-      CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true, GC_NUM_ANY));
-  if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
-      (!instrumented && EntrypointsInstrumented())) {
-    return nullptr;
-  }
-  mirror::Object* ptr = nullptr;
-  if (have_reclaimed_enough()) {
-    ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated,
-                                    usable_size, bytes_tl_bulk_allocated);
-  }
 
-  if (ptr == nullptr) {
-    const uint64_t current_time = NanoTime();
-    switch (allocator) {
-      case kAllocatorTypeRosAlloc:
-        // Fall-through.
-      case kAllocatorTypeDlMalloc: {
+  uint64_t min_freed_to_continue =
+      static_cast<uint64_t>(kMinFreedHeapAfterGcForAlloc * growth_limit_ + alloc_size);
+  // Repeatedly collect the entire heap until either
+  // (a) this was insufficiently productive at reclaiming memory and we should give upt to avoid
+  // "GC thrashing", or
+  // (b) GC was sufficiently productive (reclaimed min_freed_to_continue bytes) AND allowed us to
+  // satisfy the allocation request.
+  do {
+    bytes_freed_before = GetBytesFreedEver();
+    pre_oome_gc_count_.fetch_add(1, std::memory_order_relaxed);
+    PERFORM_SUSPENDING_OPERATION(
+        CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true, GC_NUM_ANY));
+    if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
+        (!instrumented && EntrypointsInstrumented())) {
+      return nullptr;
+    }
+    bool ran_homogeneous_space_compaction = false;
+    bool immediately_reclaimed_enough = have_reclaimed_enough();
+    if (!immediately_reclaimed_enough) {
+      const uint64_t current_time = NanoTime();
+      if (allocator == kAllocatorTypeRosAlloc || allocator == kAllocatorTypeDlMalloc) {
         if (use_homogeneous_space_compaction_for_oom_ &&
             current_time - last_time_homogeneous_space_compaction_by_oom_ >
             min_interval_homogeneous_space_compaction_by_oom_) {
           last_time_homogeneous_space_compaction_by_oom_ = current_time;
-          HomogeneousSpaceCompactResult result =
-              PERFORM_SUSPENDING_OPERATION(PerformHomogeneousSpaceCompact());
+          ran_homogeneous_space_compaction =
+              (PERFORM_SUSPENDING_OPERATION(PerformHomogeneousSpaceCompact()) ==
+               HomogeneousSpaceCompactResult::kSuccess);
           // Thread suspension could have occurred.
           if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
               (!instrumented && EntrypointsInstrumented())) {
             return nullptr;
           }
-          switch (result) {
-            case HomogeneousSpaceCompactResult::kSuccess:
-              // If the allocation succeeded, we delayed an oom.
-              ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated,
-                                              usable_size, bytes_tl_bulk_allocated);
-              if (ptr != nullptr) {
-                count_delayed_oom_++;
-              }
-              break;
-            case HomogeneousSpaceCompactResult::kErrorReject:
-              // Reject due to disabled moving GC.
-              break;
-            case HomogeneousSpaceCompactResult::kErrorVMShuttingDown:
-              // Throw OOM by default.
-              break;
-            default: {
-              UNIMPLEMENTED(FATAL) << "homogeneous space compaction result: "
-                  << static_cast<size_t>(result);
-              UNREACHABLE();
-            }
-          }
           // Always print that we ran homogeneous space compation since this can cause jank.
           VLOG(heap) << "Ran heap homogeneous space compaction, "
                     << " requested defragmentation "
@@ -2034,20 +2040,32 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self,
                     << " delayed count = "
                     << count_delayed_oom_.load();
         }
-        break;
       }
-      default: {
-        // Do nothing for others allocators.
+    }
+    if (immediately_reclaimed_enough ||
+        (ran_homogeneous_space_compaction && have_reclaimed_enough())) {
+      mirror::Object* ptr = TryToAllocate<true, true>(
+          self, allocator, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
+      if (ptr != nullptr) {
+        if (ran_homogeneous_space_compaction) {
+          count_delayed_oom_++;
+        }
+        return ptr;
       }
     }
-  }
+    // This loops only if we reclaimed plenty of memory, but presumably some other thread beat us
+    // to allocating it. In the very unlikely case that we're running into a serious fragmentation
+    // issue, and there is no other thread allocating, GCs will quickly become unsuccessful, and we
+    // will stop then. If another thread is allocating aggressively, this may go on for a while,
+    // but we are still making progress somewhere.
+  } while (GetBytesFreedEver() - bytes_freed_before > min_freed_to_continue);
 #undef PERFORM_SUSPENDING_OPERATION
-  // If the allocation hasn't succeeded by this point, throw an OOM error.
-  if (ptr == nullptr) {
+  // Throw an OOM error.
+  {
     ScopedAllowThreadSuspension ats;
     ThrowOutOfMemoryError(self, alloc_size, allocator);
   }
-  return ptr;
+  return nullptr;
 }
 
 void Heap::SetTargetHeapUtilization(float target) {
diff --git a/test/2042-reference-processing/src/Main.java b/test/2042-reference-processing/src/Main.java
index c7f547fe15..1e289f5266 100644
--- a/test/2042-reference-processing/src/Main.java
+++ b/test/2042-reference-processing/src/Main.java
@@ -238,6 +238,10 @@ public class Main {
                 System.out.println("Unexpected live object count");
             }
             dropObjects(DROP_OBJS);
+            if (i % 100 == 0) {
+              // Make sure we don't fall too far behind, otherwise we may run out of memory.
+              System.runFinalization();
+            }
             emptyAndCheckQueues();
         }
         dropObjects(MIN_LIVE_OBJS);
author	Hans Boehm <hboehm@google.com>	2023-06-27 16:30:53 -0700
committer	Hans Boehm <hboehm@google.com>	2023-07-12 15:56:58 +0000
commit	b95fee39e62ff5a24493c256c6bed87a2f85aa4a (patch)
tree	1c57faf86a0e017c1e63d129719f5945a78af746
parent	35f9047ca7b618556c1b01a1034019433a026869 (diff)