Print peak regions allocated metric at GC perf dump

Maximum (of all GC iterations) peak regions allocated
is printed during GC perf dump which reflects space
pressure on allocator.

Bug: b/69633530
Test: Any bench with -XX:DumpGCPerformanceOnShutdown cmdline flag
Change-Id: I0e2960825c5637955e9dfb677e5ed65ed852ba93
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 70685bc..7b083df 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -88,6 +88,7 @@
       from_space_num_bytes_at_first_pause_(0),
       mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
+      max_peak_num_non_free_regions_(0),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
       measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
       mark_from_read_barrier_measurements_(false),
@@ -1754,6 +1755,8 @@
     cumulative_bytes_moved_.FetchAndAddRelaxed(to_bytes);
     uint64_t to_objects = objects_moved_.LoadSequentiallyConsistent();
     cumulative_objects_moved_.FetchAndAddRelaxed(to_objects);
+    max_peak_num_non_free_regions_ = std::max(max_peak_num_non_free_regions_,
+                                              region_space_->GetNumNonFreeRegions());
     if (kEnableFromSpaceAccountingCheck) {
       CHECK_EQ(from_space_num_objects_at_first_pause_, from_objects + unevac_from_objects);
       CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
@@ -2691,6 +2694,13 @@
   }
   os << "Cumulative bytes moved " << cumulative_bytes_moved_.LoadRelaxed() << "\n";
   os << "Cumulative objects moved " << cumulative_objects_moved_.LoadRelaxed() << "\n";
+
+  os << "Peak regions allocated "
+     << max_peak_num_non_free_regions_ << " ("
+     << PrettySize(max_peak_num_non_free_regions_ * space::RegionSpace::kRegionSize)
+     << ") / " << region_space_->GetNumRegions() << " ("
+     << PrettySize(region_space_->GetNumRegions() * space::RegionSpace::kRegionSize)
+     << ")\n";
 }
 
 }  // namespace collector
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 8b4b58e..939e7fc 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -308,6 +308,11 @@
   Atomic<uint64_t> cumulative_bytes_moved_;
   Atomic<uint64_t> cumulative_objects_moved_;
 
+  // Maintain the maximum of number of non-free regions collected just before
+  // reclaim in each GC cycle. At this moment in cycle, highest number of
+  // regions are in non-free.
+  size_t max_peak_num_non_free_regions_;
+
   // The skipped blocks are memory blocks/chucks that were copies of
   // objects that were unused due to lost races (cas failures) at
   // object copy/forward pointer install. They are reused.
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 77d76fb..c9c9136 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -138,6 +138,13 @@
   uint64_t GetObjectsAllocatedInUnevacFromSpace() REQUIRES(!region_lock_) {
     return GetObjectsAllocatedInternal<RegionType::kRegionTypeUnevacFromSpace>();
   }
+  // It is OK to do a racy read here as it's only for performance dump.
+  size_t GetNumNonFreeRegions() const {
+    return num_non_free_regions_;
+  }
+  size_t GetNumRegions() const {
+    return num_regions_;
+  }
 
   bool CanMoveObjects() const OVERRIDE {
     return true;