ART: add post gc weighted allocated bytes metrics
Test: Run art with -XX:DumpGCPerformanceOnShutdown on some benchmarks.
Bug: 112187497
Change-Id: I2fc30c7dbb2d12ab8c3f98ad1f96672af6c5d438
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 46ff7dc..2ef3d92 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -90,13 +90,14 @@
Thread* self = Thread::Current();
uint64_t start_time = NanoTime();
uint64_t thread_cpu_start_time = ThreadCpuNanoTime();
- GetHeap()->CalculateWeightedAllocatedBytes();
+ GetHeap()->CalculatePreGcWeightedAllocatedBytes();
Iteration* current_iteration = GetCurrentIteration();
current_iteration->Reset(gc_cause, clear_soft_references);
// Note transaction mode is single-threaded and there's no asynchronous GC and this flag doesn't
// change in the middle of a GC.
is_transaction_active_ = Runtime::Current()->IsActiveTransaction();
RunPhases(); // Run all the GC phases.
+ GetHeap()->CalculatePostGcWeightedAllocatedBytes();
// Add the current timings to the cumulative timings.
cumulative_timings_.AddLogger(*GetTimings());
// Update cumulative statistics with how many bytes the GC iteration freed.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index bfb1019..dc79731 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -214,8 +214,10 @@
long_pause_log_threshold_(long_pause_log_threshold),
long_gc_log_threshold_(long_gc_log_threshold),
process_cpu_start_time_ns_(ProcessCpuNanoTime()),
- last_process_cpu_time_ns_(process_cpu_start_time_ns_),
- weighted_allocated_bytes_(0.0),
+ pre_gc_last_process_cpu_time_ns_(process_cpu_start_time_ns_),
+ post_gc_last_process_cpu_time_ns_(process_cpu_start_time_ns_),
+ pre_gc_weighted_allocated_bytes_(0.0),
+ post_gc_weighted_allocated_bytes_(0.0),
ignore_max_footprint_(ignore_max_footprint),
zygote_creation_lock_("zygote creation lock", kZygoteCreationLock),
zygote_space_(nullptr),
@@ -1070,12 +1072,25 @@
}
}
-void Heap::CalculateWeightedAllocatedBytes() {
- uint64_t current_process_cpu_time = ProcessCpuNanoTime();
+double Heap::CalculateGcWeightedAllocatedBytes(uint64_t gc_last_process_cpu_time_ns,
+ uint64_t current_process_cpu_time) const {
uint64_t bytes_allocated = GetBytesAllocated();
- double weight = current_process_cpu_time - last_process_cpu_time_ns_;
- weighted_allocated_bytes_ += weight * bytes_allocated;
- last_process_cpu_time_ns_ = current_process_cpu_time;
+ double weight = current_process_cpu_time - gc_last_process_cpu_time_ns;
+ return weight * bytes_allocated;
+}
+
+void Heap::CalculatePreGcWeightedAllocatedBytes() {
+ uint64_t current_process_cpu_time = ProcessCpuNanoTime();
+ pre_gc_weighted_allocated_bytes_ +=
+ CalculateGcWeightedAllocatedBytes(pre_gc_last_process_cpu_time_ns_, current_process_cpu_time);
+ pre_gc_last_process_cpu_time_ns_ = current_process_cpu_time;
+}
+
+void Heap::CalculatePostGcWeightedAllocatedBytes() {
+ uint64_t current_process_cpu_time = ProcessCpuNanoTime();
+ post_gc_weighted_allocated_bytes_ +=
+ CalculateGcWeightedAllocatedBytes(post_gc_last_process_cpu_time_ns_, current_process_cpu_time);
+ post_gc_last_process_cpu_time_ns_ = current_process_cpu_time;
}
uint64_t Heap::GetTotalGcCpuTime() {
@@ -1157,8 +1172,12 @@
}
process_cpu_start_time_ns_ = ProcessCpuNanoTime();
- last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
- weighted_allocated_bytes_ = 0u;
+
+ pre_gc_last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
+ pre_gc_weighted_allocated_bytes_ = 0u;
+
+ post_gc_last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
+ post_gc_weighted_allocated_bytes_ = 0u;
total_bytes_freed_ever_ = 0;
total_objects_freed_ever_ = 0;
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 57c7376..504eff2 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -397,11 +397,16 @@
REQUIRES(!Locks::heap_bitmap_lock_)
REQUIRES(Locks::mutator_lock_);
- double GetWeightedAllocatedBytes() const {
- return weighted_allocated_bytes_;
+ double GetPreGcWeightedAllocatedBytes() const {
+ return pre_gc_weighted_allocated_bytes_;
}
- void CalculateWeightedAllocatedBytes();
+ double GetPostGcWeightedAllocatedBytes() const {
+ return post_gc_weighted_allocated_bytes_;
+ }
+
+ void CalculatePreGcWeightedAllocatedBytes();
+ void CalculatePostGcWeightedAllocatedBytes();
uint64_t GetTotalGcCpuTime();
uint64_t GetProcessCpuStartTime() const {
@@ -858,6 +863,9 @@
REQUIRES(!*gc_complete_lock_);
void FinishGC(Thread* self, collector::GcType gc_type) REQUIRES(!*gc_complete_lock_);
+ double CalculateGcWeightedAllocatedBytes(uint64_t gc_last_process_cpu_time_ns,
+ uint64_t current_process_cpu_time) const;
+
// Create a mem map with a preferred base address.
static MemMap MapAnonymousPreferredAddress(const char* name,
uint8_t* request_begin,
@@ -1175,11 +1183,14 @@
// Starting time of the new process; meant to be used for measuring total process CPU time.
uint64_t process_cpu_start_time_ns_;
- // Last time GC started; meant to be used to measure the duration between two GCs.
- uint64_t last_process_cpu_time_ns_;
+ // Last time (before and after) GC started; meant to be used to measure the
+ // duration between two GCs.
+ uint64_t pre_gc_last_process_cpu_time_ns_;
+ uint64_t post_gc_last_process_cpu_time_ns_;
- // allocated_bytes * (current_process_cpu_time - last_process_cpu_time)
- double weighted_allocated_bytes_;
+ // allocated_bytes * (current_process_cpu_time - [pre|post]_gc_last_process_cpu_time)
+ double pre_gc_weighted_allocated_bytes_;
+ double post_gc_weighted_allocated_bytes_;
// If we ignore the max footprint it lets the heap grow until it hits the heap capacity, this is
// useful for benchmarking since it reduces time spent in GC to a low %.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 69ef2fb..d2c915e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -320,7 +320,8 @@
}
if (dump_gc_performance_on_shutdown_) {
- heap_->CalculateWeightedAllocatedBytes();
+ heap_->CalculatePreGcWeightedAllocatedBytes();
+ heap_->CalculatePostGcWeightedAllocatedBytes();
uint64_t process_cpu_end_time = ProcessCpuNanoTime();
ScopedLogSeverity sls(LogSeverity::INFO);
// This can't be called from the Heap destructor below because it
@@ -335,9 +336,15 @@
<< " out of process CPU time " << PrettyDuration(process_cpu_time)
<< " (" << ratio << ")"
<< "\n";
- double weighted_allocated_bytes = heap_->GetWeightedAllocatedBytes() / process_cpu_time;
- LOG_STREAM(INFO) << "Weighted bytes allocated over CPU time: "
- << " (" << PrettySize(weighted_allocated_bytes) << ")"
+ double pre_gc_weighted_allocated_bytes =
+ heap_->GetPreGcWeightedAllocatedBytes() / process_cpu_time;
+ double post_gc_weighted_allocated_bytes =
+ heap_->GetPostGcWeightedAllocatedBytes() / process_cpu_time;
+
+ LOG_STREAM(INFO) << "Pre GC weighted bytes allocated over CPU time: "
+ << " (" << PrettySize(pre_gc_weighted_allocated_bytes) << ")";
+ LOG_STREAM(INFO) << "Post GC weighted bytes allocated over CPU time: "
+ << " (" << PrettySize(post_gc_weighted_allocated_bytes) << ")"
<< "\n";
}