[metrics] Add total-gc-time and adjust other metrics
This change introduces the following:
1) Added total-gc-collection-time in ms as a counter so that, at anytime,
we can know how much time is spent in GC.
2) Added MetricsAverage to report average of the given data point.
3) Changed mutator-paue-time to average, which makes more sense. Also it
is renamed to world-stop-time.
4) Added averages of gc-throughputs.
5) Removed gc-meta-data-size metric as we don't capture it normally due
to the high cost of capturing this data.
Test: Observe pitot data
Bug: 191404436
Change-Id: I9da7f8c588ac4b42414beedb1b4004e0ac4b5fc2
diff --git a/libartbase/base/metrics/metrics.h b/libartbase/base/metrics/metrics.h
index 78a6387..c9110ee 100644
--- a/libartbase/base/metrics/metrics.h
+++ b/libartbase/base/metrics/metrics.h
@@ -36,22 +36,26 @@
#pragma clang diagnostic error "-Wconversion"
// See README.md in this directory for how to define metrics.
-#define ART_METRICS(METRIC) \
- METRIC(ClassLoadingTotalTime, MetricsCounter) \
- METRIC(ClassVerificationTotalTime, MetricsCounter) \
- METRIC(ClassVerificationCount, MetricsCounter) \
- METRIC(MutatorPauseTimeDuringGC, MetricsCounter) \
- METRIC(YoungGcCount, MetricsCounter) \
- METRIC(FullGcCount, MetricsCounter) \
- METRIC(TotalBytesAllocated, MetricsCounter) \
- METRIC(TotalGcMetaDataSize, MetricsCounter) \
- METRIC(JitMethodCompileTime, MetricsHistogram, 15, 0, 1'000'000) \
- METRIC(JitMethodCompileCount, MetricsCounter) \
- METRIC(YoungGcCollectionTime, MetricsHistogram, 15, 0, 60'000) \
- METRIC(FullGcCollectionTime, MetricsHistogram, 15, 0, 60'000) \
- METRIC(YoungGcThroughput, MetricsHistogram, 15, 0, 10'000) \
- METRIC(FullGcThroughput, MetricsHistogram, 15, 0, 10'000) \
- METRIC(YoungGcTracingThroughput, MetricsHistogram, 15, 0, 10'000) \
+#define ART_METRICS(METRIC) \
+ METRIC(ClassLoadingTotalTime, MetricsCounter) \
+ METRIC(ClassVerificationTotalTime, MetricsCounter) \
+ METRIC(ClassVerificationCount, MetricsCounter) \
+ METRIC(WorldStopTimeDuringGCAvg, MetricsAverage) \
+ METRIC(YoungGcCount, MetricsCounter) \
+ METRIC(FullGcCount, MetricsCounter) \
+ METRIC(TotalBytesAllocated, MetricsCounter) \
+ METRIC(TotalGcCollectionTime, MetricsCounter) \
+ METRIC(YoungGcThroughputAvg, MetricsAverage) \
+ METRIC(FullGcThroughputAvg, MetricsAverage) \
+ METRIC(YoungGcTracingThroughputAvg, MetricsAverage) \
+ METRIC(FullGcTracingThroughputAvg, MetricsAverage) \
+ METRIC(JitMethodCompileTime, MetricsHistogram, 15, 0, 1'000'000) \
+ METRIC(JitMethodCompileCount, MetricsCounter) \
+ METRIC(YoungGcCollectionTime, MetricsHistogram, 15, 0, 60'000) \
+ METRIC(FullGcCollectionTime, MetricsHistogram, 15, 0, 60'000) \
+ METRIC(YoungGcThroughput, MetricsHistogram, 15, 0, 10'000) \
+ METRIC(FullGcThroughput, MetricsHistogram, 15, 0, 10'000) \
+ METRIC(YoungGcTracingThroughput, MetricsHistogram, 15, 0, 10'000) \
METRIC(FullGcTracingThroughput, MetricsHistogram, 15, 0, 10'000)
// A lot of the metrics implementation code is generated by passing one-off macros into ART_COUNTERS
@@ -262,6 +266,8 @@
friend class MetricsHistogram;
template <DatumId datum_id, typename T, const T& AccumulatorFunction(const T&, const T&)>
friend class MetricsAccumulator;
+ template <DatumId datum_id, typename T>
+ friend class MetricsAverage;
friend class ArtMetrics;
};
@@ -273,7 +279,7 @@
};
template <DatumId counter_type, typename T = uint64_t>
-class MetricsCounter final : public MetricsBase<T> {
+class MetricsCounter : public MetricsBase<T> {
public:
using value_t = T;
explicit constexpr MetricsCounter(uint64_t value = 0) : value_{value} {
@@ -294,15 +300,62 @@
value_ = 0;
}
- private:
value_t Value() const { return value_.load(std::memory_order::memory_order_relaxed); }
+ private:
std::atomic<value_t> value_;
static_assert(std::atomic<value_t>::is_always_lock_free);
friend class ArtMetrics;
};
+template <DatumId datum_id, typename T = uint64_t>
+class MetricsAverage final : public MetricsCounter<datum_id, T> {
+ public:
+ using value_t = T;
+ using count_t = T;
+ explicit constexpr MetricsAverage(uint64_t value = 0, uint64_t count = 0) :
+ MetricsCounter<datum_id, value_t>(value), count_(count) {
+ // Ensure we do not have any unnecessary data in this class.
+ // Adding intptr_t to accommodate vtable, and rounding up to incorporate
+ // padding.
+ static_assert(RoundUp(sizeof(*this), sizeof(uint64_t))
+ == RoundUp(sizeof(intptr_t) + sizeof(value_t) + sizeof(count_t),
+ sizeof(uint64_t)));
+ }
+
+ // We use release memory-order here and then acquire in Report() to ensure
+ // that at least the non-racy reads/writes to this metric are consistent. This
+ // doesn't guarantee the atomicity of the change to both fields, but that
+ // may not be desired because:
+ // 1. The metric eventually becomes consistent.
+ // 2. For sufficiently large count_, a few data points which are off shouldn't
+ // make a huge difference to the reporter.
+ void Add(value_t value) {
+ MetricsCounter<datum_id, value_t>::Add(value);
+ count_.fetch_add(1, std::memory_order::memory_order_release);
+ }
+
+ void Report(MetricsBackend* backend) const {
+ count_t count = count_.load(std::memory_order::memory_order_acquire);
+ backend->ReportCounter(datum_id,
+ // Avoid divide-by-0.
+ count != 0 ? MetricsCounter<datum_id, value_t>::Value() / count : 0);
+ }
+
+ protected:
+ void Reset() {
+ count_ = 0;
+ MetricsCounter<datum_id, value_t>::Reset();
+ }
+
+ private:
+ std::atomic<count_t> count_;
+ static_assert(std::atomic<count_t>::is_always_lock_free);
+
+ friend class ArtMetrics;
+};
+
template <DatumId histogram_type_,
size_t num_buckets_,
int64_t minimum_value_,
diff --git a/libartbase/base/metrics/metrics_test.cc b/libartbase/base/metrics/metrics_test.cc
index eae9367..77f1b97 100644
--- a/libartbase/base/metrics/metrics_test.cc
+++ b/libartbase/base/metrics/metrics_test.cc
@@ -110,6 +110,26 @@
EXPECT_EQ(CounterValue(accumulator), kMaxValue);
}
+TEST_F(MetricsTest, AverageMetric) {
+ MetricsAverage<DatumId::kClassLoadingTotalTime, uint64_t> avg;
+
+ std::vector<std::thread> threads;
+
+ constexpr uint64_t kMaxValue = 100;
+
+ for (uint64_t i = 0; i <= kMaxValue; i++) {
+ threads.emplace_back(std::thread{[&avg, i]() {
+ avg.Add(i);
+ }});
+ }
+
+ for (auto& thread : threads) {
+ thread.join();
+ }
+
+ EXPECT_EQ(CounterValue(avg), (kMaxValue + 1) / 2);
+}
+
TEST_F(MetricsTest, DatumName) {
EXPECT_EQ("ClassVerificationTotalTime", DatumName(DatumId::kClassVerificationTotalTime));
}
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 96c792b..538d7be 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -156,16 +156,21 @@
// Return type of these functions are different. And even though the base class
// is same, using ternary operator complains.
metrics::ArtMetrics* metrics = GetMetrics();
+ are_metrics_initialized_ = true;
if (young_gen_) {
gc_time_histogram_ = metrics->YoungGcCollectionTime();
metrics_gc_count_ = metrics->YoungGcCount();
gc_throughput_histogram_ = metrics->YoungGcThroughput();
gc_tracing_throughput_hist_ = metrics->YoungGcTracingThroughput();
+ gc_throughput_avg_ = metrics->YoungGcThroughputAvg();
+ gc_tracing_throughput_avg_ = metrics->YoungGcTracingThroughputAvg();
} else {
gc_time_histogram_ = metrics->FullGcCollectionTime();
metrics_gc_count_ = metrics->FullGcCount();
gc_throughput_histogram_ = metrics->FullGcThroughput();
gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
+ gc_throughput_avg_ = metrics->FullGcThroughputAvg();
+ gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
}
}
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 70aec9a..80b3982 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -74,9 +74,12 @@
metrics_gc_count_(nullptr),
gc_throughput_histogram_(nullptr),
gc_tracing_throughput_hist_(nullptr),
+ gc_throughput_avg_(nullptr),
+ gc_tracing_throughput_avg_(nullptr),
cumulative_timings_(name),
pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true),
- is_transaction_active_(false) {
+ is_transaction_active_(false),
+ are_metrics_initialized_(false) {
ResetCumulativeStatistics();
}
@@ -145,10 +148,6 @@
}
rss *= kPageSize;
rss_histogram_.AddValue(rss / KB);
- // Add up all the GC meta-data in bytes. It doesn't make sense in itself as it
- // cannot be accumulated over GCs. So must be divided by gc-count while
- // reporting.
- GetMetrics()->TotalGcMetaDataSize()->Add(rss);
#endif
return rss;
}
@@ -190,10 +189,6 @@
RegisterPause(duration_ns);
}
total_time_ns_ += duration_ns;
- if (gc_time_histogram_ != nullptr) {
- // Report GC time in milliseconds.
- gc_time_histogram_->Add(NsToMs(duration_ns));
- }
uint64_t total_pause_time = 0;
for (uint64_t pause_time : current_iteration->GetPauseTimes()) {
MutexLock mu(self, pause_histogram_lock_);
@@ -202,22 +197,25 @@
}
metrics::ArtMetrics* metrics = runtime->GetMetrics();
// Report STW pause time in microseconds.
- metrics->MutatorPauseTimeDuringGC()->Add(total_pause_time / 1'000);
- if (metrics_gc_count_ != nullptr) {
+ metrics->WorldStopTimeDuringGCAvg()->Add(total_pause_time / 1'000);
+ // Report total collection time of all GCs put together.
+ metrics->TotalGcCollectionTime()->Add(NsToMs(duration_ns));
+ if (are_metrics_initialized_) {
metrics_gc_count_->Add(1);
- }
-
- if (gc_tracing_throughput_hist_ != nullptr) {
+ // Report GC time in milliseconds.
+ gc_time_histogram_->Add(NsToMs(duration_ns));
// Throughput in bytes/s. Add 1us to prevent possible division by 0.
uint64_t throughput = (current_iteration->GetScannedBytes() * 1'000'000)
/ (NsToUs(duration_ns) + 1);
// Report in MB/s.
- gc_tracing_throughput_hist_->Add(throughput / MB);
- }
+ throughput /= MB;
+ gc_tracing_throughput_hist_->Add(throughput);
+ gc_tracing_throughput_avg_->Add(throughput);
- if (gc_throughput_histogram_ != nullptr) {
// Report GC throughput in MB/s.
- gc_throughput_histogram_->Add(current_iteration->GetEstimatedThroughput() / MB);
+ throughput = current_iteration->GetEstimatedThroughput() / MB;
+ gc_throughput_histogram_->Add(throughput);
+ gc_throughput_avg_->Add(throughput);
}
is_transaction_active_ = false;
}
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index d2b1170..d439914 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -164,6 +164,8 @@
metrics::MetricsBase<uint64_t>* metrics_gc_count_;
metrics::MetricsBase<int64_t>* gc_throughput_histogram_;
metrics::MetricsBase<int64_t>* gc_tracing_throughput_hist_;
+ metrics::MetricsBase<uint64_t>* gc_throughput_avg_;
+ metrics::MetricsBase<uint64_t>* gc_tracing_throughput_avg_;
uint64_t total_thread_cpu_time_ns_;
uint64_t total_time_ns_;
uint64_t total_freed_objects_;
@@ -172,6 +174,10 @@
CumulativeLogger cumulative_timings_;
mutable Mutex pause_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
bool is_transaction_active_;
+ // The garbage collector algorithms will either have all the metrics pointers
+ // (above) initialized, or none of them. So instead of checking each time, we
+ // use this flag.
+ bool are_metrics_initialized_;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(GarbageCollector);
diff --git a/runtime/metrics/statsd.cc b/runtime/metrics/statsd.cc
index 7e74cf6..725a965 100644
--- a/runtime/metrics/statsd.cc
+++ b/runtime/metrics/statsd.cc
@@ -52,9 +52,9 @@
case DatumId::kClassVerificationCount:
return std::make_optional(
statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_CLASS_VERIFICATION_COUNT);
- case DatumId::kMutatorPauseTimeDuringGC:
+ case DatumId::kWorldStopTimeDuringGCAvg:
return std::make_optional(
- statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_MUTATOR_PAUSE_TIME_COUNTER_MICROS);
+ statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_WORLD_STOP_TIME_AVG_MICROS);
case DatumId::kYoungGcCount:
return std::make_optional(
statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_YOUNG_GENERATION_COLLECTION_COUNT);
@@ -64,9 +64,6 @@
case DatumId::kTotalBytesAllocated:
return std::make_optional(
statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_TOTAL_BYTES_ALLOCATED);
- case DatumId::kTotalGcMetaDataSize:
- return std::make_optional(
- statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_TOTAL_METADATA_SIZE_BYTES);
case DatumId::kYoungGcCollectionTime:
return std::make_optional(
statsd::
@@ -93,6 +90,21 @@
return std::make_optional(
statsd::
ART_DATUM_REPORTED__KIND__ART_DATUM_GC_FULL_HEAP_TRACING_THROUGHPUT_HISTO_MB_PER_SEC);
+ case DatumId::kTotalGcCollectionTime:
+ return std::make_optional(
+ statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_TOTAL_COLLECTION_TIME_MS);
+ case DatumId::kYoungGcThroughputAvg:
+ return std::make_optional(
+ statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_YOUNG_GENERATION_COLLECTION_THROUGHPUT_AVG_MB_PER_SEC);
+ case DatumId::kFullGcThroughputAvg:
+ return std::make_optional(
+ statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_FULL_HEAP_COLLECTION_THROUGHPUT_AVG_MB_PER_SEC);
+ case DatumId::kYoungGcTracingThroughputAvg:
+ return std::make_optional(
+ statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_YOUNG_GENERATION_TRACING_THROUGHPUT_AVG_MB_PER_SEC);
+ case DatumId::kFullGcTracingThroughputAvg:
+ return std::make_optional(
+ statsd::ART_DATUM_REPORTED__KIND__ART_DATUM_GC_FULL_HEAP_TRACING_THROUGHPUT_AVG_MB_PER_SEC);
}
}