Bytes_moved accounting fix and accounting cleanup

Bytes_moved should be incremented by the correct number of bytes needed for the space the object is allocated in, not the number of bytes it would take in region space. Various minor cleanups for code that I found hard to read while attempting to track this down. Remove a CHECK that held only because of the incorrect accounting. It now results in TreeHugger test failures. Bug: 79921586 Test: Build and boot AOSP. Change-Id: Iab75d271eb5b9812a127e708cf6b567d0c4c16f1
author: Hans Boehm <hboehm@google.com> 2019-05-13 12:38:54 -0700
committer: Hans Boehm <hboehm@google.com> 2019-05-15 04:40:29 +0000
commit: a253c2d27bb95f120a27dc3fa8a66184a15a7442 (patch)
tree: 0923928a83836ae8cc4e6538039c58517849910f
parent: 092f7993336961434d6d3d30908c1ca4429e3d05 (diff)
7 files changed, 33 insertions, 20 deletions
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 87863656c6..f1572cd6ee 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1000,7 +1000,7 @@ void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size
 // If true, read the page map entries in BulkFree() without using the
 // lock for better performance, assuming that the existence of an
 // allocated chunk/pointer being freed in BulkFree() guarantees that
-// the page map entry won't change. Disabled for now.
+// the page map entry won't change.
 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
 
 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 30da1ae72d..81bc445cf5 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -2393,11 +2393,9 @@ void ConcurrentCopying::Sweep(bool swap_bitmaps) {
     CheckEmptyMarkStack();
     TimingLogger::ScopedTiming split("Sweep", GetTimings());
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-      if (space->IsContinuousMemMapAllocSpace()) {
+      if (space->IsContinuousMemMapAllocSpace() && space != region_space_
+          && !immune_spaces_.ContainsSpace(space)) {
         space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
-        if (space == region_space_ || immune_spaces_.ContainsSpace(space)) {
-          continue;
-        }
         TimingLogger::ScopedTiming split2(
             alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings());
         RecordFree(alloc_space->Sweep(swap_bitmaps));
@@ -2643,7 +2641,8 @@ void ConcurrentCopying::ReclaimPhase() {
       CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
     }
     CHECK_LE(to_objects, from_objects);
-    CHECK_LE(to_bytes, from_bytes);
+    // to_bytes <= from_bytes is only approximately true, because objects expand a little when
+    // copying to non-moving space in near-OOM situations.
     if (from_bytes > 0) {
       copied_live_bytes_ratio_sum_ += static_cast<float>(to_bytes) / from_bytes;
       gc_count_++;
@@ -2660,8 +2659,10 @@ void ConcurrentCopying::ReclaimPhase() {
       CHECK_GE(cleared_bytes, from_bytes);
       CHECK_GE(cleared_objects, from_objects);
     }
-    int64_t freed_bytes = cleared_bytes - to_bytes;
-    int64_t freed_objects = cleared_objects - to_objects;
+    // freed_bytes could conceivably be negative if we fall back to nonmoving space and have to
+    // pad to a larger size.
+    int64_t freed_bytes = (int64_t)cleared_bytes - (int64_t)to_bytes;
+    uint64_t freed_objects = cleared_objects - to_objects;
     if (kVerboseMode) {
       LOG(INFO) << "RecordFree:"
                 << " from_bytes=" << from_bytes << " from_objects=" << from_objects
@@ -3451,10 +3452,10 @@ mirror::Object* ConcurrentCopying::Copy(Thread* const self,
       DCHECK(thread_running_gc_ != nullptr);
       if (LIKELY(self == thread_running_gc_)) {
         objects_moved_gc_thread_ += 1;
-        bytes_moved_gc_thread_ += region_space_alloc_size;
+        bytes_moved_gc_thread_ += bytes_allocated;
       } else {
         objects_moved_.fetch_add(1, std::memory_order_relaxed);
-        bytes_moved_.fetch_add(region_space_alloc_size, std::memory_order_relaxed);
+        bytes_moved_.fetch_add(bytes_allocated, std::memory_order_relaxed);
       }
 
       if (LIKELY(!fall_back_to_non_moving)) {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 44ee7c2023..2e5752b91e 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -369,8 +369,8 @@ class ConcurrentCopying : public GarbageCollector {
   // A cache of Heap::GetMarkBitmap().
   accounting::HeapBitmap* heap_mark_bitmap_;
   size_t live_stack_freeze_size_;
-  size_t from_space_num_objects_at_first_pause_;
-  size_t from_space_num_bytes_at_first_pause_;
+  size_t from_space_num_objects_at_first_pause_;  // Computed if kEnableFromSpaceAccountingCheck
+  size_t from_space_num_bytes_at_first_pause_;  // Computed if kEnableFromSpaceAccountingCheck
   Atomic<int> is_mark_stack_push_disallowed_;
   enum MarkStackMode {
     kMarkStackModeOff = 0,      // Mark stack is off.
@@ -384,9 +384,9 @@ class ConcurrentCopying : public GarbageCollector {
   Atomic<MarkStackMode> mark_stack_mode_;
   bool weak_ref_access_enabled_ GUARDED_BY(Locks::thread_list_lock_);
 
-  // How many objects and bytes we moved. Used for accounting.
-  // GC thread moves many more objects than mutators.
-  // Therefore, we separate the two to avoid CAS.
+  // How many objects and bytes we moved. The GC thread moves many more objects
+  // than mutators.  Therefore, we separate the two to avoid CAS.  Bytes_moved_ and
+  // bytes_moved_gc_thread_ are critical for GC triggering; the others are just informative.
   Atomic<size_t> bytes_moved_;  // Used by mutators
   Atomic<size_t> objects_moved_;  // Used by mutators
   size_t bytes_moved_gc_thread_;  // Used by GC
@@ -417,7 +417,11 @@ class ConcurrentCopying : public GarbageCollector {
 
   // The skipped blocks are memory blocks/chucks that were copies of
   // objects that were unused due to lost races (cas failures) at
-  // object copy/forward pointer install. They are reused.
+  // object copy/forward pointer install. They may be reused.
+  // Skipped blocks are always in region space. Their size is included directly
+  // in num_bytes_allocated_, i.e. they are treated as allocated, but may be directly
+  // used without going through a GC cycle like other objects. They are reused only
+  // if we run out of region space. TODO: Revisit this design.
   Mutex skipped_blocks_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::multimap<size_t, uint8_t*> skipped_blocks_map_ GUARDED_BY(skipped_blocks_lock_);
   Atomic<size_t> to_space_bytes_skipped_;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 532b3ef000..59d79530f1 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1613,8 +1613,8 @@ void Heap::VerifyHeap() {
 
 void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) {
   // Use signed comparison since freed bytes can be negative when background compaction foreground
-  // transitions occurs. This is caused by the moving objects from a bump pointer space to a
-  // free list backed space typically increasing memory footprint due to padding and binning.
+  // transitions occurs. This is typically due to objects moving from a bump pointer space to a
+  // free list backed space, which may increase memory footprint due to padding and binning.
   RACING_DCHECK_LE(freed_bytes,
                    static_cast<int64_t>(num_bytes_allocated_.load(std::memory_order_relaxed)));
   // Note: This relies on 2s complement for handling negative freed_bytes.
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 5179702916..a8b0d55340 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -466,6 +466,7 @@ void RegionSpace::ClearFromSpace(/* out */ uint64_t* cleared_bytes,
       CheckLiveBytesAgainstRegionBitmap(r);
     }
     if (r->IsInFromSpace()) {
+      DCHECK(!r->IsTlab());
       *cleared_bytes += r->BytesAllocated();
       *cleared_objects += r->ObjectsAllocated();
       --num_non_free_regions_;
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 0bbc76a581..26af6331cc 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -360,7 +360,9 @@ class RegionSpace final : public ContinuousMemMapAllocSpace {
     }
   }
 
+  // Increment object allocation count for region containing ref.
   void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
+
   bool AllocNewTlab(Thread* self, size_t min_bytes) REQUIRES(!region_lock_);
 
   uint32_t Time() {
@@ -482,6 +484,10 @@ class RegionSpace final : public ContinuousMemMapAllocSpace {
       return is_newly_allocated_;
     }
 
+    bool IsTlab() const {
+      return is_a_tlab_;
+    }
+
     bool IsInFromSpace() const {
       return type_ == RegionType::kRegionTypeFromSpace;
     }
@@ -552,6 +558,7 @@ class RegionSpace final : public ContinuousMemMapAllocSpace {
       return live_bytes_;
     }
 
+    // Returns the number of allocated bytes.  "Bulk allocated" bytes in active TLABs are excluded.
     size_t BytesAllocated() const;
 
     size_t ObjectsAllocated() const;
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 05ff55b9f1..6a4095c09f 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -204,7 +204,7 @@ class AllocSpace {
   // Alloc can be called from multiple threads at the same time and must be thread-safe.
   //
   // bytes_tl_bulk_allocated - bytes allocated in bulk ahead of time for a thread local allocation,
-  // if applicable. It can be
+  // if applicable. It is
   // 1) equal to bytes_allocated if it's not a thread local allocation,
   // 2) greater than bytes_allocated if it's a thread local
   //    allocation that required a new buffer, or
@@ -228,7 +228,7 @@ class AllocSpace {
   // Returns how many bytes were freed.
   virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
 
-  // Returns how many bytes were freed.
+  // Free (deallocate) all objects in a list, and return the number of bytes freed.
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
 
   // Revoke any sort of thread-local buffers that are used to speed up allocations for the given
author	Hans Boehm <hboehm@google.com>	2019-05-13 12:38:54 -0700
committer	Hans Boehm <hboehm@google.com>	2019-05-15 04:40:29 +0000
commit	a253c2d27bb95f120a27dc3fa8a66184a15a7442 (patch)
tree	0923928a83836ae8cc4e6538039c58517849910f
parent	092f7993336961434d6d3d30908c1ca4429e3d05 (diff)