| #include <binder/Binder.h> |
| #include <binder/IBinder.h> |
| #include <binder/IPCThreadState.h> |
| #include <binder/IServiceManager.h> |
| #include <string> |
| #include <cstring> |
| #include <cstdlib> |
| #include <cstdio> |
| |
| #include <iostream> |
| #include <vector> |
| #include <tuple> |
| |
| #include <unistd.h> |
| #include <sys/wait.h> |
| |
| using namespace std; |
| using namespace android; |
| |
| enum BinderWorkerServiceCode { |
| BINDER_NOP = IBinder::FIRST_CALL_TRANSACTION, |
| }; |
| |
| #define ASSERT_TRUE(cond) \ |
| do { \ |
| if (!(cond)) {\ |
| cerr << __func__ << ":" << __LINE__ << " condition:" << #cond << " failed\n" << endl; \ |
| exit(EXIT_FAILURE); \ |
| } \ |
| } while (0) |
| |
| class BinderWorkerService : public BBinder |
| { |
| public: |
| BinderWorkerService() {} |
| ~BinderWorkerService() {} |
| virtual status_t onTransact(uint32_t code, |
| const Parcel& data, Parcel* reply, |
| uint32_t flags = 0) { |
| (void)flags; |
| (void)data; |
| (void)reply; |
| switch (code) { |
| case BINDER_NOP: |
| return NO_ERROR; |
| default: |
| return UNKNOWN_TRANSACTION; |
| }; |
| } |
| }; |
| |
| class Pipe { |
| int m_readFd; |
| int m_writeFd; |
| Pipe(int readFd, int writeFd) : m_readFd{readFd}, m_writeFd{writeFd} {} |
| Pipe(const Pipe &) = delete; |
| Pipe& operator=(const Pipe &) = delete; |
| Pipe& operator=(const Pipe &&) = delete; |
| public: |
| Pipe(Pipe&& rval) noexcept { |
| m_readFd = rval.m_readFd; |
| m_writeFd = rval.m_writeFd; |
| rval.m_readFd = 0; |
| rval.m_writeFd = 0; |
| } |
| ~Pipe() { |
| if (m_readFd) |
| close(m_readFd); |
| if (m_writeFd) |
| close(m_writeFd); |
| } |
| void signal() { |
| bool val = true; |
| int error = write(m_writeFd, &val, sizeof(val)); |
| ASSERT_TRUE(error >= 0); |
| }; |
| void wait() { |
| bool val = false; |
| int error = read(m_readFd, &val, sizeof(val)); |
| ASSERT_TRUE(error >= 0); |
| } |
| template <typename T> void send(const T& v) { |
| int error = write(m_writeFd, &v, sizeof(T)); |
| ASSERT_TRUE(error >= 0); |
| } |
| template <typename T> void recv(T& v) { |
| int error = read(m_readFd, &v, sizeof(T)); |
| ASSERT_TRUE(error >= 0); |
| } |
| static tuple<Pipe, Pipe> createPipePair() { |
| int a[2]; |
| int b[2]; |
| |
| int error1 = pipe(a); |
| int error2 = pipe(b); |
| ASSERT_TRUE(error1 >= 0); |
| ASSERT_TRUE(error2 >= 0); |
| |
| return make_tuple(Pipe(a[0], b[1]), Pipe(b[0], a[1])); |
| } |
| }; |
| |
| static const uint32_t num_buckets = 128; |
| static uint64_t max_time_bucket = 50ull * 1000000; |
| static uint64_t time_per_bucket = max_time_bucket / num_buckets; |
| |
| struct ProcResults { |
| uint64_t m_worst = 0; |
| uint32_t m_buckets[num_buckets] = {0}; |
| uint64_t m_transactions = 0; |
| uint64_t m_long_transactions = 0; |
| uint64_t m_total_time = 0; |
| uint64_t m_best = max_time_bucket; |
| |
| void add_time(uint64_t time) { |
| if (time > max_time_bucket) { |
| m_long_transactions++; |
| } |
| m_buckets[min((uint32_t)(time / time_per_bucket), num_buckets - 1)] += 1; |
| m_best = min(time, m_best); |
| m_worst = max(time, m_worst); |
| m_transactions += 1; |
| m_total_time += time; |
| } |
| static ProcResults combine(const ProcResults& a, const ProcResults& b) { |
| ProcResults ret; |
| for (int i = 0; i < num_buckets; i++) { |
| ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i]; |
| } |
| ret.m_worst = max(a.m_worst, b.m_worst); |
| ret.m_best = min(a.m_best, b.m_best); |
| ret.m_transactions = a.m_transactions + b.m_transactions; |
| ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions; |
| ret.m_total_time = a.m_total_time + b.m_total_time; |
| return ret; |
| } |
| void dump() { |
| if (m_long_transactions > 0) { |
| cout << (double)m_long_transactions / m_transactions << "% of transactions took longer " |
| "than estimated max latency. Consider setting -m to be higher than " |
| << m_worst / 1000 << " microseconds" << endl; |
| } |
| |
| double best = (double)m_best / 1.0E6; |
| double worst = (double)m_worst / 1.0E6; |
| double average = (double)m_total_time / m_transactions / 1.0E6; |
| cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl; |
| |
| uint64_t cur_total = 0; |
| float time_per_bucket_ms = time_per_bucket / 1.0E6; |
| for (int i = 0; i < num_buckets; i++) { |
| float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms; |
| if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) { |
| cout << "50%: " << cur_time << " "; |
| } |
| if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) { |
| cout << "90%: " << cur_time << " "; |
| } |
| if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) { |
| cout << "95%: " << cur_time << " "; |
| } |
| if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) { |
| cout << "99%: " << cur_time << " "; |
| } |
| cur_total += m_buckets[i]; |
| } |
| cout << endl; |
| } |
| }; |
| |
| String16 generateServiceName(int num) |
| { |
| char num_str[32]; |
| snprintf(num_str, sizeof(num_str), "%d", num); |
| String16 serviceName = String16("binderWorker") + String16(num_str); |
| return serviceName; |
| } |
| |
| void worker_fx(int num, |
| int worker_count, |
| int iterations, |
| int payload_size, |
| bool cs_pair, |
| Pipe p) |
| { |
| // Create BinderWorkerService and for go. |
| ProcessState::self()->startThreadPool(); |
| sp<IServiceManager> serviceMgr = defaultServiceManager(); |
| sp<BinderWorkerService> service = new BinderWorkerService; |
| serviceMgr->addService(generateServiceName(num), service); |
| |
| srand(num); |
| p.signal(); |
| p.wait(); |
| |
| // If client/server pairs, then half the workers are |
| // servers and half are clients |
| int server_count = cs_pair ? worker_count / 2 : worker_count; |
| |
| // Get references to other binder services. |
| cout << "Created BinderWorker" << num << endl; |
| (void)worker_count; |
| vector<sp<IBinder> > workers; |
| for (int i = 0; i < server_count; i++) { |
| if (num == i) |
| continue; |
| #pragma clang diagnostic push |
| #pragma clang diagnostic ignored "-Wdeprecated-declarations" |
| workers.push_back(serviceMgr->getService(generateServiceName(i))); |
| #pragma clang diagnostic pop |
| } |
| |
| // Run the benchmark if client |
| ProcResults results; |
| chrono::time_point<chrono::high_resolution_clock> start, end; |
| for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) { |
| Parcel data, reply; |
| int target = cs_pair ? num % server_count : rand() % workers.size(); |
| int sz = payload_size; |
| |
| while (sz >= sizeof(uint32_t)) { |
| data.writeInt32(0); |
| sz -= sizeof(uint32_t); |
| } |
| start = chrono::high_resolution_clock::now(); |
| status_t ret = workers[target]->transact(BINDER_NOP, data, &reply); |
| end = chrono::high_resolution_clock::now(); |
| |
| uint64_t cur_time = uint64_t(chrono::duration_cast<chrono::nanoseconds>(end - start).count()); |
| results.add_time(cur_time); |
| |
| if (ret != NO_ERROR) { |
| cout << "thread " << num << " failed " << ret << "i : " << i << endl; |
| exit(EXIT_FAILURE); |
| } |
| } |
| |
| // Signal completion to master and wait. |
| p.signal(); |
| p.wait(); |
| |
| // Send results to master and wait for go to exit. |
| p.send(results); |
| p.wait(); |
| |
| exit(EXIT_SUCCESS); |
| } |
| |
| Pipe make_worker(int num, int iterations, int worker_count, int payload_size, bool cs_pair) |
| { |
| auto pipe_pair = Pipe::createPipePair(); |
| pid_t pid = fork(); |
| if (pid) { |
| /* parent */ |
| return std::move(get<0>(pipe_pair)); |
| } else { |
| /* child */ |
| worker_fx(num, worker_count, iterations, payload_size, cs_pair, |
| std::move(get<1>(pipe_pair))); |
| /* never get here */ |
| return std::move(get<0>(pipe_pair)); |
| } |
| |
| } |
| |
| void wait_all(vector<Pipe>& v) |
| { |
| for (int i = 0; i < v.size(); i++) { |
| v[i].wait(); |
| } |
| } |
| |
| void signal_all(vector<Pipe>& v) |
| { |
| for (int i = 0; i < v.size(); i++) { |
| v[i].signal(); |
| } |
| } |
| |
| void run_main(int iterations, |
| int workers, |
| int payload_size, |
| int cs_pair, |
| bool training_round=false) |
| { |
| vector<Pipe> pipes; |
| // Create all the workers and wait for them to spawn. |
| for (int i = 0; i < workers; i++) { |
| pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair)); |
| } |
| wait_all(pipes); |
| |
| // Run the workers and wait for completion. |
| chrono::time_point<chrono::high_resolution_clock> start, end; |
| cout << "waiting for workers to complete" << endl; |
| start = chrono::high_resolution_clock::now(); |
| signal_all(pipes); |
| wait_all(pipes); |
| end = chrono::high_resolution_clock::now(); |
| |
| // Calculate overall throughput. |
| double iterations_per_sec = double(iterations * workers) / (chrono::duration_cast<chrono::nanoseconds>(end - start).count() / 1.0E9); |
| cout << "iterations per sec: " << iterations_per_sec << endl; |
| |
| // Collect all results from the workers. |
| cout << "collecting results" << endl; |
| signal_all(pipes); |
| ProcResults tot_results; |
| for (int i = 0; i < workers; i++) { |
| ProcResults tmp_results; |
| pipes[i].recv(tmp_results); |
| tot_results = ProcResults::combine(tot_results, tmp_results); |
| } |
| |
| // Kill all the workers. |
| cout << "killing workers" << endl; |
| signal_all(pipes); |
| for (int i = 0; i < workers; i++) { |
| int status; |
| wait(&status); |
| if (status != 0) { |
| cout << "nonzero child status" << status << endl; |
| } |
| } |
| if (training_round) { |
| // sets max_time_bucket to 2 * m_worst from the training round. |
| // Also needs to adjust time_per_bucket accordingly. |
| max_time_bucket = 2 * tot_results.m_worst; |
| time_per_bucket = max_time_bucket / num_buckets; |
| cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl; |
| } else { |
| tot_results.dump(); |
| } |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| int workers = 2; |
| int iterations = 10000; |
| int payload_size = 0; |
| bool cs_pair = false; |
| bool training_round = false; |
| (void)argc; |
| (void)argv; |
| |
| // Parse arguments. |
| for (int i = 1; i < argc; i++) { |
| if (string(argv[i]) == "--help") { |
| cout << "Usage: binderThroughputTest [OPTIONS]" << endl; |
| cout << "\t-i N : Specify number of iterations." << endl; |
| cout << "\t-m N : Specify expected max latency in microseconds." << endl; |
| cout << "\t-p : Split workers into client/server pairs." << endl; |
| cout << "\t-s N : Specify payload size." << endl; |
| cout << "\t-t N : Run training round." << endl; |
| cout << "\t-w N : Specify total number of workers." << endl; |
| return 0; |
| } |
| if (string(argv[i]) == "-w") { |
| workers = atoi(argv[i+1]); |
| i++; |
| continue; |
| } |
| if (string(argv[i]) == "-i") { |
| iterations = atoi(argv[i+1]); |
| i++; |
| continue; |
| } |
| if (string(argv[i]) == "-s") { |
| payload_size = atoi(argv[i+1]); |
| i++; |
| } |
| if (string(argv[i]) == "-p") { |
| // client/server pairs instead of spreading |
| // requests to all workers. If true, half |
| // the workers become clients and half servers |
| cs_pair = true; |
| } |
| if (string(argv[i]) == "-t") { |
| // Run one training round before actually collecting data |
| // to get an approximation of max latency. |
| training_round = true; |
| } |
| if (string(argv[i]) == "-m") { |
| // Caller specified the max latency in microseconds. |
| // No need to run training round in this case. |
| if (atoi(argv[i+1]) > 0) { |
| max_time_bucket = strtoull(argv[i+1], (char **)nullptr, 10) * 1000; |
| time_per_bucket = max_time_bucket / num_buckets; |
| i++; |
| } else { |
| cout << "Max latency -m must be positive." << endl; |
| exit(EXIT_FAILURE); |
| } |
| } |
| } |
| |
| if (training_round) { |
| cout << "Start training round" << endl; |
| run_main(iterations, workers, payload_size, cs_pair, training_round=true); |
| cout << "Completed training round" << endl << endl; |
| } |
| |
| run_main(iterations, workers, payload_size, cs_pair); |
| return 0; |
| } |