ART: add reclaim bytes ratio metrics to GC performance
Record reclaimed bytes ratio (number of bytes reclaimed after a GC)
and print it if DumpGCPerformanceOnShutdown is set.
Test: Run art with -XX:DumpGCPerformanceOnShutdown on some benchmarks.
Bug: 112187497
Change-Id: I306e86c52102ab06d5279705ebc9e35b22b6b748
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index aba1c5a..fefe9ab 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -95,6 +95,7 @@
weak_ref_access_enabled_(true),
copied_live_bytes_ratio_sum_(0.f),
gc_count_(0),
+ reclaimed_bytes_ratio_sum_(0.f),
young_gen_(young_gen),
skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
@@ -110,7 +111,8 @@
force_evacuate_all_(false),
gc_grays_immune_objects_(false),
immune_gray_stack_lock_("concurrent copying immune gray stack lock",
- kMarkSweepMarkStackLock) {
+ kMarkSweepMarkStackLock),
+ num_bytes_allocated_before_gc_(0) {
static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize,
"The region space size and the read barrier table region size must match");
CHECK(kEnableGenerationalConcurrentCopyingCollection || !young_gen_);
@@ -323,6 +325,7 @@
void ConcurrentCopying::InitializePhase() {
TimingLogger::ScopedTiming split("InitializePhase", GetTimings());
+ num_bytes_allocated_before_gc_ = static_cast<int64_t>(heap_->GetBytesAllocated());
if (kVerboseMode) {
LOG(INFO) << "GC InitializePhase";
LOG(INFO) << "Region-space : " << reinterpret_cast<void*>(region_space_->Begin()) << "-"
@@ -2091,6 +2094,11 @@
CheckEmptyMarkStack();
+ int64_t num_bytes_allocated_after_gc = static_cast<int64_t>(heap_->GetBytesAllocated());
+ int64_t diff = num_bytes_allocated_before_gc_ - num_bytes_allocated_after_gc;
+ auto ratio = static_cast<float>(diff) / num_bytes_allocated_before_gc_;
+ reclaimed_bytes_ratio_sum_ += ratio;
+
if (kVerboseMode) {
LOG(INFO) << "GC end of ReclaimPhase";
}
@@ -3199,6 +3207,7 @@
void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) {
GarbageCollector::DumpPerformanceInfo(os);
+ size_t num_gc_cycles = GetCumulativeTimings().GetIterations();
MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_);
if (rb_slow_path_time_histogram_.SampleSize() > 0) {
Histogram<uint64_t>::CumulativeData cumulative_data;
@@ -3211,15 +3220,15 @@
if (rb_slow_path_count_gc_total_ > 0) {
os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
}
- float average_ratio = copied_live_bytes_ratio_sum_ / gc_count_;
- if (young_gen_) {
- os << "Average minor GC copied live bytes ratio "
- << average_ratio << " over " << gc_count_ << " minor GCs\n";
- } else {
- os << "Average major GC copied live bytes ratio "
- << average_ratio << " over " << gc_count_ << " major GCs\n";
- }
+ os << "Average " << (young_gen_ ? "minor" : "major") << " GC reclaim bytes ratio "
+ << (reclaimed_bytes_ratio_sum_ / num_gc_cycles) << " over " << num_gc_cycles
+ << " GC cycles\n";
+
+ os << "Average " << (young_gen_ ? "minor" : "major") << " GC copied live bytes ratio "
+ << (copied_live_bytes_ratio_sum_ / gc_count_) << " over " << gc_count_
+ << " " << (young_gen_ ? "minor" : "major") << " GCs\n";
+
os << "Cumulative bytes moved "
<< cumulative_bytes_moved_.load(std::memory_order_relaxed) << "\n";
os << "Cumulative objects moved "
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index cd086c4..6535b11 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -359,10 +359,12 @@
Atomic<uint64_t> cumulative_bytes_moved_;
Atomic<uint64_t> cumulative_objects_moved_;
- // copied_live_bytes_ratio_sum_ and gc_count_ are read and written by CC per
- // GC, in ReclaimPhase, and are read by DumpPerformanceInfo (potentially from
- // another thread). However, at present, DumpPerformanceInfo is only called
- // when the runtime shuts down, so no concurrent access.
+ // copied_live_bytes_ratio_sum_ is read and written by CC per GC, in
+ // ReclaimPhase, and is read by DumpPerformanceInfo (potentially from another
+ // thread). However, at present, DumpPerformanceInfo is only called when the
+ // runtime shuts down, so no concurrent access. The same reasoning goes for
+ // gc_count_ and reclaimed_bytes_ratio_sum_
+
// The sum of of all copied live bytes ratio (to_bytes/from_bytes)
float copied_live_bytes_ratio_sum_;
// The number of GC counts, used to calculate the average above. (It doesn't
@@ -371,6 +373,9 @@
// space.)
size_t gc_count_;
+ // reclaimed_bytes_ratio = reclaimed_bytes/num_allocated_bytes per GC cycle
+ float reclaimed_bytes_ratio_sum_;
+
// Generational "sticky", only trace through dirty objects in region space.
const bool young_gen_;
// If true, the GC thread is done scanning marked objects on dirty and aged
@@ -416,6 +421,9 @@
// ConcurrentCopying::SweepArray).
MemMap sweep_array_free_buffer_mem_map_;
+ // Use signed because after_gc may be larger than before_gc.
+ int64_t num_bytes_allocated_before_gc_;
+
class ActivateReadBarrierEntrypointsCallback;
class ActivateReadBarrierEntrypointsCheckpoint;
class AssertToSpaceInvariantFieldVisitor;