[metrics] Add histogram statistics for reporting In order to meet feature parity with the existing GC histograms, this CL adds the ability to estimate percentiles and confidence intervals from histogram data. This is currently exposed only to metrics backends, which can choose to compute these summary statistics if desired. The human-readable stream backend does this. Example output: JitMethodCompileTime: range = 0...1000000 99% confidence interval: 333...66332 buckets: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 Bug: 178034939 Test: m test-art-host-gtest-art_runtime_tests Change-Id: Ia0f8f53419018583af20029c4c9e1ea9d59a9985

commit: 3a1ca400758302f44a95ae8255ca7c012d651d73 [log] [tgz]
author: Eric Holk <eholk@google.com> Thu Jan 21 23:10:09 2021 +0000
committer: Treehugger Robot <treehugger-gerrit@google.com> Fri Jan 29 19:58:12 2021 +0000
tree: 9b208b6a6e1f4e0917fdf6b567907c8974d3e1be
parent: e77141a9f2c8d44866c02fe726ac0449984ebe10 [diff]
diff --git a/runtime/metrics/metrics.cc b/runtime/metrics/metrics.cc
index 60add77..f0b2421 100644
--- a/runtime/metrics/metrics.cc
+++ b/runtime/metrics/metrics.cc

@@ -85,6 +85,66 @@
   os << "\n*** Done dumping ART internal metrics ***\n";
 }
 
+std::vector<uint32_t> MetricsBackend::CumulativeBuckets(
+    const std::vector<uint32_t>& buckets) const {
+  std::vector<uint32_t> cumulative_buckets(buckets.size() + 1);
+  uint32_t total_count = 0;
+  size_t i = 0;
+  for (auto& bucket : buckets) {
+    cumulative_buckets[i++] = bucket + total_count;
+    total_count += bucket;
+  }
+  cumulative_buckets[i] = total_count;
+  return cumulative_buckets;
+}
+
+int64_t MetricsBackend::HistogramPercentile(double percentile,
+                                            int64_t minimum_value,
+                                            int64_t maximum_value,
+                                            const std::vector<uint32_t>& cumulative_buckets) const {
+  const uint32_t total_count = cumulative_buckets[cumulative_buckets.size() - 1];
+  // Find which bucket has the right percentile
+  const uint32_t percentile_count = static_cast<uint32_t>(percentile * total_count);
+  size_t bucket_index;
+  // We could use a binary search here, but that complicates the code and linear search is usually
+  // faster for up to 100 elements, and our histograms should normally have less than 100 buckets.
+  for (bucket_index = 0; bucket_index < cumulative_buckets.size(); bucket_index++) {
+    if (cumulative_buckets[bucket_index] > percentile_count) {
+      break;
+    }
+  }
+  // Find the bounds in both count and percentile of the bucket we landed in.
+  const uint32_t lower_count = bucket_index > 0 ? cumulative_buckets[bucket_index] : 0;
+  const uint32_t upper_count = cumulative_buckets[bucket_index];
+
+  const double lower_percentile = static_cast<double>(lower_count) / total_count;
+  const double upper_percentile = static_cast<double>(upper_count) / total_count;
+  const double width_percentile = upper_percentile - lower_percentile;
+
+  // Compute what values the bucket covers
+  const size_t num_buckets = cumulative_buckets.size() - 1;
+  const int64_t bucket_width = (maximum_value - minimum_value) / static_cast<int64_t>(num_buckets);
+  const int64_t lower_value = bucket_width * static_cast<int64_t>(bucket_index);
+  const int64_t upper_value = lower_value + bucket_width;
+
+  // Now linearly interpolate a value between lower_value and upper_value.
+  const double in_bucket_location = (percentile - lower_percentile) / width_percentile;
+  return static_cast<int64_t>(static_cast<double>(lower_value + upper_value) * in_bucket_location);
+}
+
+std::pair<int64_t, int64_t> MetricsBackend::HistogramConfidenceInterval(
+    double interval,
+    int64_t minimum_value,
+    int64_t maximum_value,
+    const std::vector<uint32_t>& cumulative_buckets) const {
+  const double lower_percentile = (1.0 - interval) / 2.0;
+  const double upper_percentile = lower_percentile + interval;
+
+  return std::make_pair(
+      HistogramPercentile(lower_percentile, minimum_value, maximum_value, cumulative_buckets),
+      HistogramPercentile(upper_percentile, minimum_value, maximum_value, cumulative_buckets));
+}
+
 StreamBackend::StreamBackend(std::ostream& os) : os_{os} {}
 
 void StreamBackend::BeginSession([[maybe_unused]] const SessionData& session_data) {
@@ -100,12 +160,19 @@
 }
 
 void StreamBackend::ReportHistogram(DatumId histogram_type,
-                                    int64_t minimum_value_,
-                                    int64_t maximum_value_,
+                                    int64_t minimum_value,
+                                    int64_t maximum_value,
                                     const std::vector<uint32_t>& buckets) {
-  os_ << DatumName(histogram_type) << ": range = " << minimum_value_ << "..." << maximum_value_;
+  os_ << DatumName(histogram_type) << ": range = " << minimum_value << "..." << maximum_value
+      << "\n";
+  std::vector<uint32_t> cumulative_buckets = CumulativeBuckets(buckets);
+  std::pair<int64_t, int64_t> confidence_interval = HistogramConfidenceInterval(
+      /*interval=*/0.99, minimum_value, maximum_value, cumulative_buckets);
+  os_ << "  99% confidence interval: " << confidence_interval.first << "..."
+      << confidence_interval.second << "\n";
+
   if (buckets.size() > 0) {
-    os_ << ", buckets: ";
+    os_ << "  buckets: ";
     bool first = true;
     for (const auto& count : buckets) {
       if (!first) {

diff --git a/runtime/metrics/metrics.h b/runtime/metrics/metrics.h
index 36948dd..1c03394 100644
--- a/runtime/metrics/metrics.h
+++ b/runtime/metrics/metrics.h

@@ -76,7 +76,7 @@
 #undef ART_COUNTER
 
 #define ART_HISTOGRAM(name, num_buckets, low_value, high_value) k##name,
-  ART_HISTOGRAMS(ART_HISTOGRAM)
+      ART_HISTOGRAMS(ART_HISTOGRAM)
 #undef ART_HISTOGRAM
 };
 
@@ -130,6 +130,36 @@
                                int64_t maximum_value,
                                const std::vector<uint32_t>& buckets) = 0;
 
+  ///////////////////////
+  // Utility Functions //
+  ///////////////////////
+  //
+  // The functions below are utility functions that could be helpful for other backends. For
+  // example, these can calculate various statistics about histograms that could be helpful to
+  // report.
+
+  // Returns an array where each entry is the total number of entries up to and including the
+  // current bucket. There is one extra item at the end which is the sum of all of the original
+  // buckets.
+  std::vector<uint32_t> CumulativeBuckets(const std::vector<uint32_t>& buckets) const;
+
+  // Estimates the value that would be at given percentile.
+  //
+  // This assumes that values are uniformly distributed within a bucket, which is probably not
+  // a valid assumption but it's the best we can do with the data we collect.
+  int64_t HistogramPercentile(double percentile,
+                              int64_t minimum_value,
+                              int64_t maximum_value,
+                              const std::vector<uint32_t>& cumulative_buckets) const;
+
+  // Returns a pair of the lower and upper bound of the confidence interval specified by the
+  // interval parameter.
+  std::pair<int64_t, int64_t> HistogramConfidenceInterval(
+      double interval,
+      int64_t minimum_value,
+      int64_t maximum_value,
+      const std::vector<uint32_t>& cumulative_buckets) const;
+
   template <DatumId counter_type>
   friend class MetricsCounter;
   template <DatumId histogram_type, size_t num_buckets, int64_t low_value, int64_t high_value>
@@ -412,7 +442,6 @@
   MessageQueue<ShutdownRequestedMessage> messages_;
 };
 
-
 }  // namespace metrics
 }  // namespace art
 

diff --git a/runtime/metrics/metrics_test.cc b/runtime/metrics/metrics_test.cc
index f568ea0..79684e9 100644
--- a/runtime/metrics/metrics_test.cc
+++ b/runtime/metrics/metrics_test.cc

@@ -214,8 +214,7 @@
   metrics.ReportAllMetrics(&backend);
 
   // Make sure the resulting string lists all the counters.
-#define COUNTER(name) \
-  EXPECT_NE(os.str().find(DatumName(DatumId::k##name)), std::string::npos)
+#define COUNTER(name) EXPECT_NE(os.str().find(DatumName(DatumId::k##name)), std::string::npos)
   ART_COUNTERS(COUNTER);
 #undef COUNTER
 
@@ -226,6 +225,41 @@
 #undef HISTOGRAM
 }
 
+TEST_F(MetricsTest, HistogramPercentileAndConfidenceIntervale) {
+  ArtMetrics metrics;
+
+  // Declare a backend
+  class TestBackend : public TestBackendBase {
+   public:
+    const int64_t high_value = 42000;
+    const int64_t low_value = 1;
+
+    void ReportHistogram(DatumId,
+                         int64_t minimum_value,
+                         int64_t maximum_value,
+                         const std::vector<uint32_t>& buckets) override {
+      const auto cumulative_buckets{CumulativeBuckets(buckets)};
+
+      // The 50th percentile should be below the largest value and above the highest value we added.
+      const int64_t percentile =
+          HistogramPercentile(0.50, minimum_value, maximum_value, cumulative_buckets);
+      EXPECT_LT(percentile, high_value);
+      EXPECT_GT(percentile, low_value);
+
+      // Some basic reasonableness checks for confidence intervals.
+      const auto interval =
+          HistogramConfidenceInterval(0.95, minimum_value, maximum_value, cumulative_buckets);
+      EXPECT_LT(interval.first, interval.second);
+    }
+  } backend;
+
+  // Collect some data
+  metrics.JitMethodCompileTime()->Add(backend.high_value);
+  metrics.JitMethodCompileTime()->Add(backend.low_value);
+
+  metrics.ReportAllMetrics(&backend);
+}
+
 }  // namespace metrics
 }  // namespace art
commit	3a1ca400758302f44a95ae8255ca7c012d651d73	[log] [tgz]
author	Eric Holk <eholk@google.com>	Thu Jan 21 23:10:09 2021 +0000
committer	Treehugger Robot <treehugger-gerrit@google.com>	Fri Jan 29 19:58:12 2021 +0000
tree	9b208b6a6e1f4e0917fdf6b567907c8974d3e1be
parent	e77141a9f2c8d44866c02fe726ac0449984ebe10 [diff]