binderThroughputTest: compute percentiles exactly

Instead of using buckets to compute the percentiles, store all of the data and compute percentiles exactly by sorting. This avoids cases where the computed maximum latency is much much larger than the average latency. For example, with `-w 64 -i 100000 -p -t`, I got an average latency of 0.47ms, but the worst latency is 243ms. This puts everything below 1.8ms in the same bucket. Bug: 293593894 Test: Manually running the benchmark outputs the correct values Change-Id: Ib215e4fb503e053294525d944f045555aadae97f Signed-off-by: Alice Ryhl <aliceryhl@google.com>
author: Alice Ryhl <aliceryhl@google.com> 2023-07-28 15:11:11 +0000
committer: Alice Ryhl <aliceryhl@google.com> 2024-01-09 10:04:32 +0000
commit: 4d46cd8051a340474a55053873ba4d9df3725c6b (patch)
tree: 643c2d8d09dc20c227846268e8f16b60efe3f284
parent: b39f05a60c4bd0fa3f71999dc259128eeb19fc08 (diff)
1 files changed, 94 insertions, 84 deletions
diff --git a/libs/binder/tests/binderThroughputTest.cpp b/libs/binder/tests/binderThroughputTest.cpp
index 8f9929585f..d7f631828a 100644
--- a/libs/binder/tests/binderThroughputTest.cpp
+++ b/libs/binder/tests/binderThroughputTest.cpp
@@ -49,6 +49,63 @@ public:
     }
 };
 
+static uint64_t warn_latency = std::numeric_limits<uint64_t>::max();
+
+struct ProcResults {
+    vector<uint64_t> data;
+
+    ProcResults(size_t capacity) { data.reserve(capacity); }
+
+    void add_time(uint64_t time) { data.push_back(time); }
+    void combine_with(const ProcResults& append) {
+        data.insert(data.end(), append.data.begin(), append.data.end());
+    }
+    uint64_t worst() {
+        return *max_element(data.begin(), data.end());
+    }
+    void dump() {
+        if (data.size() == 0) {
+            // This avoids index-out-of-bounds below.
+            cout << "error: no data\n" << endl;
+            return;
+        }
+
+        size_t num_long_transactions = 0;
+        for (uint64_t elem : data) {
+            if (elem > warn_latency) {
+                num_long_transactions += 1;
+            }
+        }
+
+        if (num_long_transactions > 0) {
+            cout << (double)num_long_transactions / data.size() << "% of transactions took longer "
+                "than estimated max latency. Consider setting -m to be higher than "
+                << worst() / 1000 << " microseconds" << endl;
+        }
+
+        sort(data.begin(), data.end());
+
+        uint64_t total_time = 0;
+        for (uint64_t elem : data) {
+            total_time += elem;
+        }
+
+        double best = (double)data[0] / 1.0E6;
+        double worst = (double)data.back() / 1.0E6;
+        double average = (double)total_time / data.size() / 1.0E6;
+        cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
+
+        double percentile_50 = data[(50 * data.size()) / 100] / 1.0E6;
+        double percentile_90 = data[(90 * data.size()) / 100] / 1.0E6;
+        double percentile_95 = data[(95 * data.size()) / 100] / 1.0E6;
+        double percentile_99 = data[(99 * data.size()) / 100] / 1.0E6;
+        cout << "50%: " << percentile_50 << " ";
+        cout << "90%: " << percentile_90 << " ";
+        cout << "95%: " << percentile_95 << " ";
+        cout << "99%: " << percentile_99 << endl;
+    }
+};
+
 class Pipe {
     int m_readFd;
     int m_writeFd;
@@ -79,13 +136,37 @@ public:
         int error = read(m_readFd, &val, sizeof(val));
         ASSERT_TRUE(error >= 0);
     }
-    template <typename T> void send(const T& v) {
-        int error = write(m_writeFd, &v, sizeof(T));
+    void send(const ProcResults& v) {
+        size_t num_elems = v.data.size();
+
+        int error = write(m_writeFd, &num_elems, sizeof(size_t));
         ASSERT_TRUE(error >= 0);
+
+        char* to_write = (char*)v.data.data();
+        size_t num_bytes = sizeof(uint64_t) * num_elems;
+
+        while (num_bytes > 0) {
+            int ret = write(m_writeFd, to_write, num_bytes);
+            ASSERT_TRUE(ret >= 0);
+            num_bytes -= ret;
+            to_write += ret;
+        }
     }
-    template <typename T> void recv(T& v) {
-        int error = read(m_readFd, &v, sizeof(T));
+    void recv(ProcResults& v) {
+        size_t num_elems = 0;
+        int error = read(m_readFd, &num_elems, sizeof(size_t));
         ASSERT_TRUE(error >= 0);
+
+        v.data.resize(num_elems);
+        char* read_to = (char*)v.data.data();
+        size_t num_bytes = sizeof(uint64_t) * num_elems;
+
+        while (num_bytes > 0) {
+            int ret = read(m_readFd, read_to, num_bytes);
+            ASSERT_TRUE(ret >= 0);
+            num_bytes -= ret;
+            read_to += ret;
+        }
     }
     static tuple<Pipe, Pipe> createPipePair() {
         int a[2];
@@ -100,74 +181,6 @@ public:
     }
 };
 
-static const uint32_t num_buckets = 128;
-static uint64_t max_time_bucket = 50ull * 1000000;
-static uint64_t time_per_bucket = max_time_bucket / num_buckets;
-
-struct ProcResults {
-    uint64_t m_worst = 0;
-    uint32_t m_buckets[num_buckets] = {0};
-    uint64_t m_transactions = 0;
-    uint64_t m_long_transactions = 0;
-    uint64_t m_total_time = 0;
-    uint64_t m_best = max_time_bucket;
-
-    void add_time(uint64_t time) {
-        if (time > max_time_bucket) {
-            m_long_transactions++;
-        }
-        m_buckets[min((uint32_t)(time / time_per_bucket), num_buckets - 1)] += 1;
-        m_best = min(time, m_best);
-        m_worst = max(time, m_worst);
-        m_transactions += 1;
-        m_total_time += time;
-    }
-    static ProcResults combine(const ProcResults& a, const ProcResults& b) {
-        ProcResults ret;
-        for (int i = 0; i < num_buckets; i++) {
-            ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i];
-        }
-        ret.m_worst = max(a.m_worst, b.m_worst);
-        ret.m_best = min(a.m_best, b.m_best);
-        ret.m_transactions = a.m_transactions + b.m_transactions;
-        ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions;
-        ret.m_total_time = a.m_total_time + b.m_total_time;
-        return ret;
-    }
-    void dump() {
-        if (m_long_transactions > 0) {
-            cout << (double)m_long_transactions / m_transactions << "% of transactions took longer "
-                "than estimated max latency. Consider setting -m to be higher than "
-                 << m_worst / 1000 << " microseconds" << endl;
-        }
-
-        double best = (double)m_best / 1.0E6;
-        double worst = (double)m_worst / 1.0E6;
-        double average = (double)m_total_time / m_transactions / 1.0E6;
-        cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
-
-        uint64_t cur_total = 0;
-        float time_per_bucket_ms = time_per_bucket / 1.0E6;
-        for (int i = 0; i < num_buckets; i++) {
-            float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms;
-            if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) {
-                cout << "50%: " << cur_time << " ";
-            }
-            if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) {
-                cout << "90%: " << cur_time << " ";
-            }
-            if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) {
-                cout << "95%: " << cur_time << " ";
-            }
-            if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) {
-                cout << "99%: " << cur_time << " ";
-            }
-            cur_total += m_buckets[i];
-        }
-        cout << endl;
-    }
-};
-
 String16 generateServiceName(int num)
 {
     char num_str[32];
@@ -208,7 +221,8 @@ void worker_fx(int num,
     }
 
     // Run the benchmark if client
-    ProcResults results;
+    ProcResults results(iterations);
+
     chrono::time_point<chrono::high_resolution_clock> start, end;
     for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) {
         Parcel data, reply;
@@ -302,11 +316,10 @@ void run_main(int iterations,
     // Collect all results from the workers.
     cout << "collecting results" << endl;
     signal_all(pipes);
-    ProcResults tot_results;
+    ProcResults tot_results(0), tmp_results(0);
     for (int i = 0; i < workers; i++) {
-        ProcResults tmp_results;
         pipes[i].recv(tmp_results);
-        tot_results = ProcResults::combine(tot_results, tmp_results);
+        tot_results.combine_with(tmp_results);
     }
 
     // Kill all the workers.
@@ -320,13 +333,11 @@ void run_main(int iterations,
         }
     }
     if (training_round) {
-        // sets max_time_bucket to 2 * m_worst from the training round.
-        // Also needs to adjust time_per_bucket accordingly.
-        max_time_bucket = 2 * tot_results.m_worst;
-        time_per_bucket = max_time_bucket / num_buckets;
-        cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl;
+        // Sets warn_latency to 2 * worst from the training round.
+        warn_latency = 2 * tot_results.worst();
+        cout << "Max latency during training: " << tot_results.worst() / 1.0E6 << "ms" << endl;
     } else {
-            tot_results.dump();
+        tot_results.dump();
     }
 }
 
@@ -403,8 +414,7 @@ int main(int argc, char *argv[])
                 cout << "Max latency -m must be positive." << endl;
                 exit(EXIT_FAILURE);
             }
-            max_time_bucket = max_time_us * 1000ull;
-            time_per_bucket = max_time_bucket / num_buckets;
+            warn_latency = max_time_us * 1000ull;
             i++;
             continue;
         }
author	Alice Ryhl <aliceryhl@google.com>	2023-07-28 15:11:11 +0000
committer	Alice Ryhl <aliceryhl@google.com>	2024-01-09 10:04:32 +0000
commit	4d46cd8051a340474a55053873ba4d9df3725c6b (patch)
tree	643c2d8d09dc20c227846268e8f16b60efe3f284
parent	b39f05a60c4bd0fa3f71999dc259128eeb19fc08 (diff)