diff options
author | 2022-09-13 14:14:16 +0100 | |
---|---|---|
committer | 2022-09-16 20:14:46 +0000 | |
commit | 1a8cc460546a3bf5f55acf46efb5073d5ed412d3 (patch) | |
tree | 24c2ffff1efae12e9f6f10c5a8df74b8ff7a2e0e | |
parent | 4c9498e8f1c8ef8c54cfd7462e24a505690cdc10 (diff) |
odrefresh: Report dex2oat result for each stage.
This is the first step to improve odrefresh metrics collection. More CLs
will follow.
After the change, the metrics should tell:
- whether dex2oat is invoked or not
- whether dex2oat crashed or exited with a code
Partially cherry-picked from
commit fa152ba922a6301a10671d4684b75fe8cb2a2330
Bug: 246534524
Test: m test-art-host-gtest-art_runtime_tests
Test: atest ArtGtestsTargetChroot:OdRefreshTest
Merged-In: Id962a6e6a765371d290a836c51059d725c9250f3
Change-Id: I69ddf64552d4f2e51e111a40871890d41c88721d
-rw-r--r-- | odrefresh/Android.bp | 1 | ||||
-rw-r--r-- | odrefresh/odr_metrics.cc | 19 | ||||
-rw-r--r-- | odrefresh/odr_metrics.h | 22 | ||||
-rw-r--r-- | odrefresh/odrefresh.cc | 24 | ||||
-rw-r--r-- | odrefresh/odrefresh_test.cc | 9 | ||||
-rw-r--r-- | runtime/exec_utils.cc | 99 | ||||
-rw-r--r-- | runtime/exec_utils.h | 45 | ||||
-rw-r--r-- | runtime/exec_utils_test.cc | 52 |
8 files changed, 159 insertions, 112 deletions
diff --git a/odrefresh/Android.bp b/odrefresh/Android.bp index 55a1fdea2c..9958e3a797 100644 --- a/odrefresh/Android.bp +++ b/odrefresh/Android.bp @@ -154,6 +154,7 @@ cc_library_static { host_supported: true, export_include_dirs: ["include"], local_include_dirs: ["include"], + header_libs: ["libart_headers"], shared_libs: ["libartbase"], target: { android: { diff --git a/odrefresh/odr_metrics.cc b/odrefresh/odr_metrics.cc index a2ce51f65b..57a0241609 100644 --- a/odrefresh/odr_metrics.cc +++ b/odrefresh/odr_metrics.cc @@ -79,7 +79,26 @@ void OdrMetrics::SetCompilationTime(int32_t millis) { case Stage::kComplete: case Stage::kPreparation: case Stage::kUnknown: + LOG(FATAL) << "Unexpected stage " << stage_ << " when setting compilation time"; + } +} + +void OdrMetrics::SetDex2OatResult(const ExecResult& dex2oat_result) { + switch (stage_) { + case Stage::kPrimaryBootClasspath: + primary_bcp_dex2oat_result_ = dex2oat_result; + break; + case Stage::kSecondaryBootClasspath: + secondary_bcp_dex2oat_result_ = dex2oat_result; break; + case Stage::kSystemServerClasspath: + system_server_dex2oat_result_ = dex2oat_result; + break; + case Stage::kCheck: + case Stage::kComplete: + case Stage::kPreparation: + case Stage::kUnknown: + LOG(FATAL) << "Unexpected stage " << stage_ << " when setting dex2oat result"; } } diff --git a/odrefresh/odr_metrics.h b/odrefresh/odr_metrics.h index 7ebc148684..afe3efd348 100644 --- a/odrefresh/odr_metrics.h +++ b/odrefresh/odr_metrics.h @@ -24,6 +24,7 @@ #include <string> #include "base/macros.h" +#include "exec_utils.h" #include "odr_metrics_record.h" namespace art { @@ -115,6 +116,9 @@ class OdrMetrics final { // Sets the current odrefresh processing stage. void SetStage(Stage stage); + // Sets the result of the current dex2oat invocation. + void SetDex2OatResult(const ExecResult& dex2oat_result); + // Record metrics into an OdrMetricsRecord. // returns true on success, false if instance is not valid (because the trigger value is not set). bool ToRecord(/*out*/OdrMetricsRecord* record) const; @@ -139,10 +143,28 @@ class OdrMetrics final { int32_t cache_space_free_start_mib_ = 0; int32_t cache_space_free_end_mib_ = 0; + + // The total time spent on compiling primary BCP. int32_t primary_bcp_compilation_millis_ = 0; + + // The result of the dex2oat invocation for compiling primary BCP, or `std::nullopt` if dex2oat is + // not invoked. + std::optional<ExecResult> primary_bcp_dex2oat_result_; + + // The total time spent on compiling secondary BCP. int32_t secondary_bcp_compilation_millis_ = 0; + + // The result of the dex2oat invocation for compiling secondary BCP, or `std::nullopt` if dex2oat + // is not invoked. + std::optional<ExecResult> secondary_bcp_dex2oat_result_; + + // The total time spent on compiling system server. int32_t system_server_compilation_millis_ = 0; + // The result of the last dex2oat invocation for compiling system server, or `std::nullopt` if + // dex2oat is not invoked. + std::optional<ExecResult> system_server_dex2oat_result_; + friend class ScopedOdrCompilationTimer; }; diff --git a/odrefresh/odrefresh.cc b/odrefresh/odrefresh.cc index 3994f385ed..bed694cc5e 100644 --- a/odrefresh/odrefresh.cc +++ b/odrefresh/odrefresh.cc @@ -1549,15 +1549,11 @@ WARN_UNUSED bool OnDeviceRefresh::CompileBootClasspathArtifacts( return true; } - bool timed_out = false; - int dex2oat_exit_code = exec_utils_->ExecAndReturnCode(args, timeout, &timed_out, error_msg); + ExecResult dex2oat_result = exec_utils_->ExecAndReturnResult(args, timeout, error_msg); + metrics.SetDex2OatResult(dex2oat_result); - if (dex2oat_exit_code != 0) { - if (timed_out) { - metrics.SetStatus(OdrMetrics::Status::kTimeLimitExceeded); - } else { - metrics.SetStatus(OdrMetrics::Status::kDex2OatError); - } + if (dex2oat_result.status != ExecResult::kExited || dex2oat_result.exit_code != 0) { + metrics.SetStatus(OdrMetrics::Status::kDex2OatError); EraseFiles(staging_files); return false; } @@ -1708,15 +1704,11 @@ WARN_UNUSED bool OnDeviceRefresh::CompileSystemServerArtifacts( return true; } - bool timed_out = false; - int dex2oat_exit_code = exec_utils_->ExecAndReturnCode(args, timeout, &timed_out, error_msg); + ExecResult dex2oat_result = exec_utils_->ExecAndReturnResult(args, timeout, error_msg); + metrics.SetDex2OatResult(dex2oat_result); - if (dex2oat_exit_code != 0) { - if (timed_out) { - metrics.SetStatus(OdrMetrics::Status::kTimeLimitExceeded); - } else { - metrics.SetStatus(OdrMetrics::Status::kDex2OatError); - } + if (dex2oat_result.status != ExecResult::kExited || dex2oat_result.exit_code != 0) { + metrics.SetStatus(OdrMetrics::Status::kDex2OatError); EraseFiles(staging_files); return false; } diff --git a/odrefresh/odrefresh_test.cc b/odrefresh/odrefresh_test.cc index d28c0d8942..cb6bee0a88 100644 --- a/odrefresh/odrefresh_test.cc +++ b/odrefresh/odrefresh_test.cc @@ -71,11 +71,10 @@ class MockExecUtils : public ExecUtils { public: // A workaround to avoid MOCK_METHOD on a method with an `std::string*` parameter, which will lead // to a conflict between gmock and android-base/logging.h (b/132668253). - int ExecAndReturnCode(const std::vector<std::string>& arg_vector, - int, - bool*, - std::string*) const override { - return DoExecAndReturnCode(arg_vector); + ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector, + int, + std::string*) const override { + return {.status = ExecResult::kExited, .exit_code = DoExecAndReturnCode(arg_vector)}; } MOCK_METHOD(int, DoExecAndReturnCode, (const std::vector<std::string>& arg_vector), (const)); diff --git a/runtime/exec_utils.cc b/runtime/exec_utils.cc index 929f09f784..dd389f8c97 100644 --- a/runtime/exec_utils.cc +++ b/runtime/exec_utils.cc @@ -102,10 +102,10 @@ pid_t ExecWithoutWait(const std::vector<std::string>& arg_vector, std::string* e } } -int WaitChild(pid_t pid, - const std::vector<std::string>& arg_vector, - bool no_wait, - std::string* error_msg) { +ExecResult WaitChild(pid_t pid, + const std::vector<std::string>& arg_vector, + bool no_wait, + std::string* error_msg) { siginfo_t info; // WNOWAIT leaves the child in a waitable state. The call is still blocking. int options = WEXITED | (no_wait ? WNOWAIT : 0); @@ -114,7 +114,7 @@ int WaitChild(pid_t pid, ToCommandLine(arg_vector).c_str(), pid, strerror(errno)); - return -1; + return {.status = ExecResult::kUnknown}; } if (info.si_pid != pid) { *error_msg = StringPrintf("waitid failed for (%s): wanted pid %d, got %d: %s", @@ -122,38 +122,38 @@ int WaitChild(pid_t pid, pid, info.si_pid, strerror(errno)); - return -1; + return {.status = ExecResult::kUnknown}; } if (info.si_code != CLD_EXITED) { *error_msg = StringPrintf("Failed to execute (%s) because the child process is terminated by signal %d", ToCommandLine(arg_vector).c_str(), info.si_status); - return -1; + return {.status = ExecResult::kSignaled, .signal = info.si_status}; } - return info.si_status; + return {.status = ExecResult::kExited, .exit_code = info.si_status}; } // A fallback implementation of `WaitChildWithTimeout` that creates a thread to wait instead of // relying on `pidfd_open`. -int WaitChildWithTimeoutFallback(pid_t pid, - const std::vector<std::string>& arg_vector, - int timeout_ms, - bool* timed_out, - std::string* error_msg) { +ExecResult WaitChildWithTimeoutFallback(pid_t pid, + const std::vector<std::string>& arg_vector, + int timeout_ms, + std::string* error_msg) { bool child_exited = false; + bool timed_out = false; std::condition_variable cv; std::mutex m; std::thread wait_thread([&]() { std::unique_lock<std::mutex> lock(m); if (!cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&] { return child_exited; })) { - *timed_out = true; + timed_out = true; kill(pid, SIGKILL); } }); - int status = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg); + ExecResult result = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg); { std::unique_lock<std::mutex> lock(m); @@ -163,24 +163,23 @@ int WaitChildWithTimeoutFallback(pid_t pid, wait_thread.join(); // The timeout error should have a higher priority than any other error. - if (*timed_out) { + if (timed_out) { *error_msg = StringPrintf("Failed to execute (%s) because the child process timed out after %dms", ToCommandLine(arg_vector).c_str(), timeout_ms); - return -1; + return ExecResult{.status = ExecResult::kTimedOut}; } - return status; + return result; } // Waits for the child process to finish and leaves the child in a waitable state. -int WaitChildWithTimeout(pid_t pid, - unique_fd pidfd, - const std::vector<std::string>& arg_vector, - int timeout_ms, - bool* timed_out, - std::string* error_msg) { +ExecResult WaitChildWithTimeout(pid_t pid, + unique_fd pidfd, + const std::vector<std::string>& arg_vector, + int timeout_ms, + std::string* error_msg) { auto cleanup = android::base::make_scope_guard([&]() { kill(pid, SIGKILL); std::string ignored_error_msg; @@ -196,15 +195,14 @@ int WaitChildWithTimeout(pid_t pid, if (poll_ret < 0) { *error_msg = StringPrintf("poll failed for pid %d: %s", pid, strerror(errno)); - return -1; + return {.status = ExecResult::kUnknown}; } if (poll_ret == 0) { - *timed_out = true; *error_msg = StringPrintf("Failed to execute (%s) because the child process timed out after %dms", ToCommandLine(arg_vector).c_str(), timeout_ms); - return -1; + return {.status = ExecResult::kTimedOut}; } cleanup.Disable(); @@ -242,54 +240,47 @@ bool ParseProcStat(const std::string& stat_content, int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector, std::string* error_msg) const { - bool ignored_timed_out; - return ExecAndReturnCode(arg_vector, /*timeout_sec=*/-1, &ignored_timed_out, error_msg); + return ExecAndReturnResult(arg_vector, /*timeout_sec=*/-1, error_msg).exit_code; } -int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector, - int timeout_sec, - bool* timed_out, - std::string* error_msg) const { - return ExecAndReturnCode( - arg_vector, timeout_sec, ExecCallbacks(), timed_out, /*stat=*/nullptr, error_msg); +ExecResult ExecUtils::ExecAndReturnResult(const std::vector<std::string>& arg_vector, + int timeout_sec, + std::string* error_msg) const { + return ExecAndReturnResult(arg_vector, timeout_sec, ExecCallbacks(), /*stat=*/nullptr, error_msg); } -int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector, - int timeout_sec, - const ExecCallbacks& callbacks, - /*out*/ bool* timed_out, - /*out*/ ProcessStat* stat, - /*out*/ std::string* error_msg) const { - *timed_out = false; - +ExecResult ExecUtils::ExecAndReturnResult(const std::vector<std::string>& arg_vector, + int timeout_sec, + const ExecCallbacks& callbacks, + /*out*/ ProcessStat* stat, + /*out*/ std::string* error_msg) const { if (timeout_sec > INT_MAX / 1000) { *error_msg = "Timeout too large"; - return -1; + return {.status = ExecResult::kStartFailed}; } // Start subprocess. pid_t pid = ExecWithoutWait(arg_vector, error_msg); if (pid == -1) { - return -1; + return {.status = ExecResult::kStartFailed}; } callbacks.on_start(pid); // Wait for subprocess to finish. - int status; + ExecResult result; if (timeout_sec >= 0) { unique_fd pidfd = PidfdOpen(pid); if (pidfd.get() >= 0) { - status = WaitChildWithTimeout( - pid, std::move(pidfd), arg_vector, timeout_sec * 1000, timed_out, error_msg); + result = + WaitChildWithTimeout(pid, std::move(pidfd), arg_vector, timeout_sec * 1000, error_msg); } else { LOG(DEBUG) << StringPrintf( "pidfd_open failed for pid %d: %s, falling back", pid, strerror(errno)); - status = - WaitChildWithTimeoutFallback(pid, arg_vector, timeout_sec * 1000, timed_out, error_msg); + result = WaitChildWithTimeoutFallback(pid, arg_vector, timeout_sec * 1000, error_msg); } } else { - status = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg); + result = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg); } if (stat != nullptr) { @@ -303,12 +294,12 @@ int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector, std::string local_error_msg; // TODO(jiakaiz): Use better logic to detect waitid failure. - if (WaitChild(pid, arg_vector, /*no_wait=*/false, &local_error_msg) < 0 && - local_error_msg.find("waitid failed") == 0) { + if (WaitChild(pid, arg_vector, /*no_wait=*/false, &local_error_msg).status == + ExecResult::kUnknown) { LOG(ERROR) << "Failed to clean up child process '" << arg_vector[0] << "': " << local_error_msg; } - return status; + return result; } bool ExecUtils::Exec(const std::vector<std::string>& arg_vector, std::string* error_msg) const { diff --git a/runtime/exec_utils.h b/runtime/exec_utils.h index aa0d0fc932..c280e3e26f 100644 --- a/runtime/exec_utils.h +++ b/runtime/exec_utils.h @@ -43,6 +43,29 @@ struct ExecCallbacks { std::function<void(pid_t pid)> on_end = [](pid_t) {}; }; +struct ExecResult { + enum Status { + // Unable to get the status. + kUnknown = 0, + // Process exited normally with an exit code. + kExited = 1, + // Process terminated by a signal. + kSignaled = 2, + // Process timed out and killed. + kTimedOut = 3, + // Failed to start the process. + kStartFailed = 4, + }; + + Status status = kUnknown; + + // The process exit code, if `status` is `kExited`, or -1. + int exit_code = -1; + + // The signal that terminated the process, if `status` is `kSignaled`, or 0. + int signal = 0; +}; + // Wrapper on fork/execv to run a command in a subprocess. // These spawn child processes using the environment as it was set when the single instance // of the runtime (Runtime::Current()) was started. If no instance of the runtime was started, it @@ -59,21 +82,19 @@ class ExecUtils { // Executes the command specified in `arg_vector` in a subprocess with a timeout. // If `timeout_sec` is negative, blocks until the subprocess exits. - // Returns the process exit code on success, -1 otherwise. - // Sets `timed_out` to true if the process times out, or false otherwise. - virtual int ExecAndReturnCode(const std::vector<std::string>& arg_vector, - int timeout_sec, - /*out*/ bool* timed_out, - /*out*/ std::string* error_msg) const; + // Returns a structured result. If the status is not `kExited`, also returns a non-empty + // `error_msg`. + virtual ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector, + int timeout_sec, + /*out*/ std::string* error_msg) const; // Same as above, but also collects stat of the process and calls callbacks. The stat is collected // no matter the child process succeeds or not. - virtual int ExecAndReturnCode(const std::vector<std::string>& arg_vector, - int timeout_sec, - const ExecCallbacks& callbacks, - /*out*/ bool* timed_out, - /*out*/ ProcessStat* stat, - /*out*/ std::string* error_msg) const; + virtual ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector, + int timeout_sec, + const ExecCallbacks& callbacks, + /*out*/ ProcessStat* stat, + /*out*/ std::string* error_msg) const; protected: virtual android::base::unique_fd PidfdOpen(pid_t pid) const; diff --git a/runtime/exec_utils_test.cc b/runtime/exec_utils_test.cc index e9bbd8fd2b..e89180b572 100644 --- a/runtime/exec_utils_test.cc +++ b/runtime/exec_utils_test.cc @@ -18,6 +18,7 @@ #include <sys/utsname.h> +#include <csignal> #include <cstring> #include <filesystem> #include <memory> @@ -120,7 +121,9 @@ TEST_P(ExecUtilsTest, ExecError) { std::string error_msg; // Historical note: Running on Valgrind failed due to some memory // that leaks in thread alternate signal stacks. - EXPECT_FALSE(exec_utils_->Exec(command, &error_msg)); + ExecResult result = exec_utils_->ExecAndReturnResult(command, /*timeout_sec=*/-1, &error_msg); + EXPECT_EQ(result.status, ExecResult::kSignaled); + EXPECT_EQ(result.signal, SIGABRT); EXPECT_FALSE(error_msg.empty()); } @@ -173,9 +176,9 @@ TEST_P(ExecUtilsTest, ExecTimeout) { static constexpr int kWaitSeconds = 1; std::vector<std::string> command = SleepCommand(kSleepSeconds); std::string error_msg; - bool timed_out; - ASSERT_EQ(exec_utils_->ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), -1); - EXPECT_TRUE(timed_out) << error_msg; + EXPECT_EQ(exec_utils_->ExecAndReturnResult(command, kWaitSeconds, &error_msg).status, + ExecResult::kTimedOut) + << error_msg; EXPECT_THAT(error_msg, HasSubstr("timed out")); } @@ -184,10 +187,9 @@ TEST_P(ExecUtilsTest, ExecNoTimeout) { static constexpr int kWaitSeconds = 5; std::vector<std::string> command = SleepCommand(kSleepSeconds); std::string error_msg; - bool timed_out; - ASSERT_EQ(exec_utils_->ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), 0) + EXPECT_EQ(exec_utils_->ExecAndReturnResult(command, kWaitSeconds, &error_msg).status, + ExecResult::kExited) << error_msg; - EXPECT_FALSE(timed_out); } TEST_P(ExecUtilsTest, ExecStat) { @@ -195,7 +197,6 @@ TEST_P(ExecUtilsTest, ExecStat) { command.push_back(GetBin("id")); std::string error_msg; - bool timed_out; ProcessStat stat; // The process filename is "a) b". @@ -206,9 +207,11 @@ TEST_P(ExecUtilsTest, ExecStat) { EXPECT_CALL(*exec_utils_, GetUptimeMs()).WillOnce(Return(1620344887ll)); EXPECT_CALL(*exec_utils_, GetTicksPerSec()).WillOnce(Return(100)); - ASSERT_EQ(exec_utils_->ExecAndReturnCode( - command, /*timeout_sec=*/-1, ExecCallbacks(), &timed_out, &stat, &error_msg), - 0) + ASSERT_EQ( + exec_utils_ + ->ExecAndReturnResult(command, /*timeout_sec=*/-1, ExecCallbacks(), &stat, &error_msg) + .status, + ExecResult::kExited) << error_msg; EXPECT_EQ(stat.cpu_time_ms, 990); @@ -219,7 +222,6 @@ TEST_P(ExecUtilsTest, ExecStatFailed) { std::vector<std::string> command = SleepCommand(5); std::string error_msg; - bool timed_out; ProcessStat stat; EXPECT_CALL(*exec_utils_, GetProcStat(_)) @@ -230,8 +232,11 @@ TEST_P(ExecUtilsTest, ExecStatFailed) { EXPECT_CALL(*exec_utils_, GetTicksPerSec()).WillOnce(Return(100)); // This will always time out. - exec_utils_->ExecAndReturnCode( - command, /*timeout_sec=*/1, ExecCallbacks(), &timed_out, &stat, &error_msg); + ASSERT_EQ( + exec_utils_ + ->ExecAndReturnResult(command, /*timeout_sec=*/1, ExecCallbacks(), &stat, &error_msg) + .status, + ExecResult::kTimedOut); EXPECT_EQ(stat.cpu_time_ms, 990); EXPECT_EQ(stat.wall_time_ms, 1007); @@ -251,17 +256,14 @@ TEST_P(ExecUtilsTest, ExecCallbacks) { command.push_back(GetBin("id")); std::string error_msg; - bool timed_out; - - exec_utils_->ExecAndReturnCode(command, - /*timeout_sec=*/-1, - ExecCallbacks{ - .on_start = on_start.AsStdFunction(), - .on_end = on_end.AsStdFunction(), - }, - &timed_out, - /*stat=*/nullptr, - &error_msg); + exec_utils_->ExecAndReturnResult(command, + /*timeout_sec=*/-1, + ExecCallbacks{ + .on_start = on_start.AsStdFunction(), + .on_end = on_end.AsStdFunction(), + }, + /*stat=*/nullptr, + &error_msg); } INSTANTIATE_TEST_SUITE_P(AlwaysOrNeverFallback, ExecUtilsTest, testing::Values(true, false)); |