odrefresh: Report dex2oat result for each stage.
This is the first step to improve odrefresh metrics collection. More CLs
will follow.
After the change, the metrics should tell:
- whether dex2oat is invoked or not
- whether dex2oat crashed or exited with a code
Bug: 246534524
Test: m test-art-host-gtest-art_runtime_tests
Test: m test-art-host-gtest-art_artd_tests
Test: atest ArtGtestsTargetChroot:OdRefreshTest
Ignore-AOSP-First: Contains internal-only changes. Will cherry-pick the
AOSP part.
Change-Id: Id962a6e6a765371d290a836c51059d725c9250f3
diff --git a/artd/artd.cc b/artd/artd.cc
index 24d5205..db317f9 100644
--- a/artd/artd.cc
+++ b/artd/artd.cc
@@ -871,14 +871,13 @@
android::base::Result<int> Artd::ExecAndReturnCode(const std::vector<std::string>& args,
int timeout_sec,
ProcessStat* stat) const {
- bool ignored_timed_out = false; // This information is encoded in the error message.
std::string error_msg;
- int exit_code = exec_utils_->ExecAndReturnCode(
- args, timeout_sec, ExecCallbacks(), &ignored_timed_out, stat, &error_msg);
- if (exit_code < 0) {
+ ExecResult result =
+ exec_utils_->ExecAndReturnResult(args, timeout_sec, ExecCallbacks(), stat, &error_msg);
+ if (result.status != ExecResult::kExited) {
return Error() << error_msg;
}
- return exit_code;
+ return result.exit_code;
}
} // namespace artd
diff --git a/artd/artd_test.cc b/artd/artd_test.cc
index 3a3b29a..92180b1 100644
--- a/artd/artd_test.cc
+++ b/artd/artd_test.cc
@@ -172,16 +172,19 @@
public:
// A workaround to avoid MOCK_METHOD on a method with an `std::string*` parameter, which will lead
// to a conflict between gmock and android-base/logging.h (b/132668253).
- int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
- int,
- const ExecCallbacks&,
- bool*,
- ProcessStat* stat,
- std::string*) const override {
- return DoExecAndReturnCode(arg_vector, stat);
+ ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector,
+ int,
+ const ExecCallbacks&,
+ ProcessStat* stat,
+ std::string*) const override {
+ Result<int> code = DoExecAndReturnCode(arg_vector, stat);
+ if (code.ok()) {
+ return {.status = ExecResult::kExited, .exit_code = code.value()};
+ }
+ return {.status = ExecResult::kUnknown};
}
- MOCK_METHOD(int,
+ MOCK_METHOD(Result<int>,
DoExecAndReturnCode,
(const std::vector<std::string>& arg_vector, ProcessStat* stat),
(const));
diff --git a/odrefresh/Android.bp b/odrefresh/Android.bp
index 55a1fde..9958e3a 100644
--- a/odrefresh/Android.bp
+++ b/odrefresh/Android.bp
@@ -154,6 +154,7 @@
host_supported: true,
export_include_dirs: ["include"],
local_include_dirs: ["include"],
+ header_libs: ["libart_headers"],
shared_libs: ["libartbase"],
target: {
android: {
diff --git a/odrefresh/odr_metrics.cc b/odrefresh/odr_metrics.cc
index a2ce51f..57a0241 100644
--- a/odrefresh/odr_metrics.cc
+++ b/odrefresh/odr_metrics.cc
@@ -79,7 +79,26 @@
case Stage::kComplete:
case Stage::kPreparation:
case Stage::kUnknown:
+ LOG(FATAL) << "Unexpected stage " << stage_ << " when setting compilation time";
+ }
+}
+
+void OdrMetrics::SetDex2OatResult(const ExecResult& dex2oat_result) {
+ switch (stage_) {
+ case Stage::kPrimaryBootClasspath:
+ primary_bcp_dex2oat_result_ = dex2oat_result;
break;
+ case Stage::kSecondaryBootClasspath:
+ secondary_bcp_dex2oat_result_ = dex2oat_result;
+ break;
+ case Stage::kSystemServerClasspath:
+ system_server_dex2oat_result_ = dex2oat_result;
+ break;
+ case Stage::kCheck:
+ case Stage::kComplete:
+ case Stage::kPreparation:
+ case Stage::kUnknown:
+ LOG(FATAL) << "Unexpected stage " << stage_ << " when setting dex2oat result";
}
}
diff --git a/odrefresh/odr_metrics.h b/odrefresh/odr_metrics.h
index 7ebc148..afe3efd 100644
--- a/odrefresh/odr_metrics.h
+++ b/odrefresh/odr_metrics.h
@@ -24,6 +24,7 @@
#include <string>
#include "base/macros.h"
+#include "exec_utils.h"
#include "odr_metrics_record.h"
namespace art {
@@ -115,6 +116,9 @@
// Sets the current odrefresh processing stage.
void SetStage(Stage stage);
+ // Sets the result of the current dex2oat invocation.
+ void SetDex2OatResult(const ExecResult& dex2oat_result);
+
// Record metrics into an OdrMetricsRecord.
// returns true on success, false if instance is not valid (because the trigger value is not set).
bool ToRecord(/*out*/OdrMetricsRecord* record) const;
@@ -139,10 +143,28 @@
int32_t cache_space_free_start_mib_ = 0;
int32_t cache_space_free_end_mib_ = 0;
+
+ // The total time spent on compiling primary BCP.
int32_t primary_bcp_compilation_millis_ = 0;
+
+ // The result of the dex2oat invocation for compiling primary BCP, or `std::nullopt` if dex2oat is
+ // not invoked.
+ std::optional<ExecResult> primary_bcp_dex2oat_result_;
+
+ // The total time spent on compiling secondary BCP.
int32_t secondary_bcp_compilation_millis_ = 0;
+
+ // The result of the dex2oat invocation for compiling secondary BCP, or `std::nullopt` if dex2oat
+ // is not invoked.
+ std::optional<ExecResult> secondary_bcp_dex2oat_result_;
+
+ // The total time spent on compiling system server.
int32_t system_server_compilation_millis_ = 0;
+ // The result of the last dex2oat invocation for compiling system server, or `std::nullopt` if
+ // dex2oat is not invoked.
+ std::optional<ExecResult> system_server_dex2oat_result_;
+
friend class ScopedOdrCompilationTimer;
};
diff --git a/odrefresh/odrefresh.cc b/odrefresh/odrefresh.cc
index ba32b0a..618163a 100644
--- a/odrefresh/odrefresh.cc
+++ b/odrefresh/odrefresh.cc
@@ -1549,15 +1549,11 @@
return true;
}
- bool timed_out = false;
- int dex2oat_exit_code = exec_utils_->ExecAndReturnCode(args, timeout, &timed_out, error_msg);
+ ExecResult dex2oat_result = exec_utils_->ExecAndReturnResult(args, timeout, error_msg);
+ metrics.SetDex2OatResult(dex2oat_result);
- if (dex2oat_exit_code != 0) {
- if (timed_out) {
- metrics.SetStatus(OdrMetrics::Status::kTimeLimitExceeded);
- } else {
- metrics.SetStatus(OdrMetrics::Status::kDex2OatError);
- }
+ if (dex2oat_result.status != ExecResult::kExited || dex2oat_result.exit_code != 0) {
+ metrics.SetStatus(OdrMetrics::Status::kDex2OatError);
EraseFiles(staging_files);
return false;
}
@@ -1708,15 +1704,11 @@
return true;
}
- bool timed_out = false;
- int dex2oat_exit_code = exec_utils_->ExecAndReturnCode(args, timeout, &timed_out, error_msg);
+ ExecResult dex2oat_result = exec_utils_->ExecAndReturnResult(args, timeout, error_msg);
+ metrics.SetDex2OatResult(dex2oat_result);
- if (dex2oat_exit_code != 0) {
- if (timed_out) {
- metrics.SetStatus(OdrMetrics::Status::kTimeLimitExceeded);
- } else {
- metrics.SetStatus(OdrMetrics::Status::kDex2OatError);
- }
+ if (dex2oat_result.status != ExecResult::kExited || dex2oat_result.exit_code != 0) {
+ metrics.SetStatus(OdrMetrics::Status::kDex2OatError);
EraseFiles(staging_files);
return false;
}
diff --git a/odrefresh/odrefresh_test.cc b/odrefresh/odrefresh_test.cc
index d28c0d8..cb6bee0 100644
--- a/odrefresh/odrefresh_test.cc
+++ b/odrefresh/odrefresh_test.cc
@@ -71,11 +71,10 @@
public:
// A workaround to avoid MOCK_METHOD on a method with an `std::string*` parameter, which will lead
// to a conflict between gmock and android-base/logging.h (b/132668253).
- int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
- int,
- bool*,
- std::string*) const override {
- return DoExecAndReturnCode(arg_vector);
+ ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector,
+ int,
+ std::string*) const override {
+ return {.status = ExecResult::kExited, .exit_code = DoExecAndReturnCode(arg_vector)};
}
MOCK_METHOD(int, DoExecAndReturnCode, (const std::vector<std::string>& arg_vector), (const));
diff --git a/runtime/exec_utils.cc b/runtime/exec_utils.cc
index 929f09f..dd389f8 100644
--- a/runtime/exec_utils.cc
+++ b/runtime/exec_utils.cc
@@ -102,10 +102,10 @@
}
}
-int WaitChild(pid_t pid,
- const std::vector<std::string>& arg_vector,
- bool no_wait,
- std::string* error_msg) {
+ExecResult WaitChild(pid_t pid,
+ const std::vector<std::string>& arg_vector,
+ bool no_wait,
+ std::string* error_msg) {
siginfo_t info;
// WNOWAIT leaves the child in a waitable state. The call is still blocking.
int options = WEXITED | (no_wait ? WNOWAIT : 0);
@@ -114,7 +114,7 @@
ToCommandLine(arg_vector).c_str(),
pid,
strerror(errno));
- return -1;
+ return {.status = ExecResult::kUnknown};
}
if (info.si_pid != pid) {
*error_msg = StringPrintf("waitid failed for (%s): wanted pid %d, got %d: %s",
@@ -122,38 +122,38 @@
pid,
info.si_pid,
strerror(errno));
- return -1;
+ return {.status = ExecResult::kUnknown};
}
if (info.si_code != CLD_EXITED) {
*error_msg =
StringPrintf("Failed to execute (%s) because the child process is terminated by signal %d",
ToCommandLine(arg_vector).c_str(),
info.si_status);
- return -1;
+ return {.status = ExecResult::kSignaled, .signal = info.si_status};
}
- return info.si_status;
+ return {.status = ExecResult::kExited, .exit_code = info.si_status};
}
// A fallback implementation of `WaitChildWithTimeout` that creates a thread to wait instead of
// relying on `pidfd_open`.
-int WaitChildWithTimeoutFallback(pid_t pid,
- const std::vector<std::string>& arg_vector,
- int timeout_ms,
- bool* timed_out,
- std::string* error_msg) {
+ExecResult WaitChildWithTimeoutFallback(pid_t pid,
+ const std::vector<std::string>& arg_vector,
+ int timeout_ms,
+ std::string* error_msg) {
bool child_exited = false;
+ bool timed_out = false;
std::condition_variable cv;
std::mutex m;
std::thread wait_thread([&]() {
std::unique_lock<std::mutex> lock(m);
if (!cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&] { return child_exited; })) {
- *timed_out = true;
+ timed_out = true;
kill(pid, SIGKILL);
}
});
- int status = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg);
+ ExecResult result = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg);
{
std::unique_lock<std::mutex> lock(m);
@@ -163,24 +163,23 @@
wait_thread.join();
// The timeout error should have a higher priority than any other error.
- if (*timed_out) {
+ if (timed_out) {
*error_msg =
StringPrintf("Failed to execute (%s) because the child process timed out after %dms",
ToCommandLine(arg_vector).c_str(),
timeout_ms);
- return -1;
+ return ExecResult{.status = ExecResult::kTimedOut};
}
- return status;
+ return result;
}
// Waits for the child process to finish and leaves the child in a waitable state.
-int WaitChildWithTimeout(pid_t pid,
- unique_fd pidfd,
- const std::vector<std::string>& arg_vector,
- int timeout_ms,
- bool* timed_out,
- std::string* error_msg) {
+ExecResult WaitChildWithTimeout(pid_t pid,
+ unique_fd pidfd,
+ const std::vector<std::string>& arg_vector,
+ int timeout_ms,
+ std::string* error_msg) {
auto cleanup = android::base::make_scope_guard([&]() {
kill(pid, SIGKILL);
std::string ignored_error_msg;
@@ -196,15 +195,14 @@
if (poll_ret < 0) {
*error_msg = StringPrintf("poll failed for pid %d: %s", pid, strerror(errno));
- return -1;
+ return {.status = ExecResult::kUnknown};
}
if (poll_ret == 0) {
- *timed_out = true;
*error_msg =
StringPrintf("Failed to execute (%s) because the child process timed out after %dms",
ToCommandLine(arg_vector).c_str(),
timeout_ms);
- return -1;
+ return {.status = ExecResult::kTimedOut};
}
cleanup.Disable();
@@ -242,54 +240,47 @@
int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector,
std::string* error_msg) const {
- bool ignored_timed_out;
- return ExecAndReturnCode(arg_vector, /*timeout_sec=*/-1, &ignored_timed_out, error_msg);
+ return ExecAndReturnResult(arg_vector, /*timeout_sec=*/-1, error_msg).exit_code;
}
-int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector,
- int timeout_sec,
- bool* timed_out,
- std::string* error_msg) const {
- return ExecAndReturnCode(
- arg_vector, timeout_sec, ExecCallbacks(), timed_out, /*stat=*/nullptr, error_msg);
+ExecResult ExecUtils::ExecAndReturnResult(const std::vector<std::string>& arg_vector,
+ int timeout_sec,
+ std::string* error_msg) const {
+ return ExecAndReturnResult(arg_vector, timeout_sec, ExecCallbacks(), /*stat=*/nullptr, error_msg);
}
-int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector,
- int timeout_sec,
- const ExecCallbacks& callbacks,
- /*out*/ bool* timed_out,
- /*out*/ ProcessStat* stat,
- /*out*/ std::string* error_msg) const {
- *timed_out = false;
-
+ExecResult ExecUtils::ExecAndReturnResult(const std::vector<std::string>& arg_vector,
+ int timeout_sec,
+ const ExecCallbacks& callbacks,
+ /*out*/ ProcessStat* stat,
+ /*out*/ std::string* error_msg) const {
if (timeout_sec > INT_MAX / 1000) {
*error_msg = "Timeout too large";
- return -1;
+ return {.status = ExecResult::kStartFailed};
}
// Start subprocess.
pid_t pid = ExecWithoutWait(arg_vector, error_msg);
if (pid == -1) {
- return -1;
+ return {.status = ExecResult::kStartFailed};
}
callbacks.on_start(pid);
// Wait for subprocess to finish.
- int status;
+ ExecResult result;
if (timeout_sec >= 0) {
unique_fd pidfd = PidfdOpen(pid);
if (pidfd.get() >= 0) {
- status = WaitChildWithTimeout(
- pid, std::move(pidfd), arg_vector, timeout_sec * 1000, timed_out, error_msg);
+ result =
+ WaitChildWithTimeout(pid, std::move(pidfd), arg_vector, timeout_sec * 1000, error_msg);
} else {
LOG(DEBUG) << StringPrintf(
"pidfd_open failed for pid %d: %s, falling back", pid, strerror(errno));
- status =
- WaitChildWithTimeoutFallback(pid, arg_vector, timeout_sec * 1000, timed_out, error_msg);
+ result = WaitChildWithTimeoutFallback(pid, arg_vector, timeout_sec * 1000, error_msg);
}
} else {
- status = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg);
+ result = WaitChild(pid, arg_vector, /*no_wait=*/true, error_msg);
}
if (stat != nullptr) {
@@ -303,12 +294,12 @@
std::string local_error_msg;
// TODO(jiakaiz): Use better logic to detect waitid failure.
- if (WaitChild(pid, arg_vector, /*no_wait=*/false, &local_error_msg) < 0 &&
- local_error_msg.find("waitid failed") == 0) {
+ if (WaitChild(pid, arg_vector, /*no_wait=*/false, &local_error_msg).status ==
+ ExecResult::kUnknown) {
LOG(ERROR) << "Failed to clean up child process '" << arg_vector[0] << "': " << local_error_msg;
}
- return status;
+ return result;
}
bool ExecUtils::Exec(const std::vector<std::string>& arg_vector, std::string* error_msg) const {
diff --git a/runtime/exec_utils.h b/runtime/exec_utils.h
index aa0d0fc..c280e3e 100644
--- a/runtime/exec_utils.h
+++ b/runtime/exec_utils.h
@@ -43,6 +43,29 @@
std::function<void(pid_t pid)> on_end = [](pid_t) {};
};
+struct ExecResult {
+ enum Status {
+ // Unable to get the status.
+ kUnknown = 0,
+ // Process exited normally with an exit code.
+ kExited = 1,
+ // Process terminated by a signal.
+ kSignaled = 2,
+ // Process timed out and killed.
+ kTimedOut = 3,
+ // Failed to start the process.
+ kStartFailed = 4,
+ };
+
+ Status status = kUnknown;
+
+ // The process exit code, if `status` is `kExited`, or -1.
+ int exit_code = -1;
+
+ // The signal that terminated the process, if `status` is `kSignaled`, or 0.
+ int signal = 0;
+};
+
// Wrapper on fork/execv to run a command in a subprocess.
// These spawn child processes using the environment as it was set when the single instance
// of the runtime (Runtime::Current()) was started. If no instance of the runtime was started, it
@@ -59,21 +82,19 @@
// Executes the command specified in `arg_vector` in a subprocess with a timeout.
// If `timeout_sec` is negative, blocks until the subprocess exits.
- // Returns the process exit code on success, -1 otherwise.
- // Sets `timed_out` to true if the process times out, or false otherwise.
- virtual int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
- int timeout_sec,
- /*out*/ bool* timed_out,
- /*out*/ std::string* error_msg) const;
+ // Returns a structured result. If the status is not `kExited`, also returns a non-empty
+ // `error_msg`.
+ virtual ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector,
+ int timeout_sec,
+ /*out*/ std::string* error_msg) const;
// Same as above, but also collects stat of the process and calls callbacks. The stat is collected
// no matter the child process succeeds or not.
- virtual int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
- int timeout_sec,
- const ExecCallbacks& callbacks,
- /*out*/ bool* timed_out,
- /*out*/ ProcessStat* stat,
- /*out*/ std::string* error_msg) const;
+ virtual ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector,
+ int timeout_sec,
+ const ExecCallbacks& callbacks,
+ /*out*/ ProcessStat* stat,
+ /*out*/ std::string* error_msg) const;
protected:
virtual android::base::unique_fd PidfdOpen(pid_t pid) const;
diff --git a/runtime/exec_utils_test.cc b/runtime/exec_utils_test.cc
index e9bbd8f..e89180b 100644
--- a/runtime/exec_utils_test.cc
+++ b/runtime/exec_utils_test.cc
@@ -18,6 +18,7 @@
#include <sys/utsname.h>
+#include <csignal>
#include <cstring>
#include <filesystem>
#include <memory>
@@ -120,7 +121,9 @@
std::string error_msg;
// Historical note: Running on Valgrind failed due to some memory
// that leaks in thread alternate signal stacks.
- EXPECT_FALSE(exec_utils_->Exec(command, &error_msg));
+ ExecResult result = exec_utils_->ExecAndReturnResult(command, /*timeout_sec=*/-1, &error_msg);
+ EXPECT_EQ(result.status, ExecResult::kSignaled);
+ EXPECT_EQ(result.signal, SIGABRT);
EXPECT_FALSE(error_msg.empty());
}
@@ -173,9 +176,9 @@
static constexpr int kWaitSeconds = 1;
std::vector<std::string> command = SleepCommand(kSleepSeconds);
std::string error_msg;
- bool timed_out;
- ASSERT_EQ(exec_utils_->ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), -1);
- EXPECT_TRUE(timed_out) << error_msg;
+ EXPECT_EQ(exec_utils_->ExecAndReturnResult(command, kWaitSeconds, &error_msg).status,
+ ExecResult::kTimedOut)
+ << error_msg;
EXPECT_THAT(error_msg, HasSubstr("timed out"));
}
@@ -184,10 +187,9 @@
static constexpr int kWaitSeconds = 5;
std::vector<std::string> command = SleepCommand(kSleepSeconds);
std::string error_msg;
- bool timed_out;
- ASSERT_EQ(exec_utils_->ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), 0)
+ EXPECT_EQ(exec_utils_->ExecAndReturnResult(command, kWaitSeconds, &error_msg).status,
+ ExecResult::kExited)
<< error_msg;
- EXPECT_FALSE(timed_out);
}
TEST_P(ExecUtilsTest, ExecStat) {
@@ -195,7 +197,6 @@
command.push_back(GetBin("id"));
std::string error_msg;
- bool timed_out;
ProcessStat stat;
// The process filename is "a) b".
@@ -206,9 +207,11 @@
EXPECT_CALL(*exec_utils_, GetUptimeMs()).WillOnce(Return(1620344887ll));
EXPECT_CALL(*exec_utils_, GetTicksPerSec()).WillOnce(Return(100));
- ASSERT_EQ(exec_utils_->ExecAndReturnCode(
- command, /*timeout_sec=*/-1, ExecCallbacks(), &timed_out, &stat, &error_msg),
- 0)
+ ASSERT_EQ(
+ exec_utils_
+ ->ExecAndReturnResult(command, /*timeout_sec=*/-1, ExecCallbacks(), &stat, &error_msg)
+ .status,
+ ExecResult::kExited)
<< error_msg;
EXPECT_EQ(stat.cpu_time_ms, 990);
@@ -219,7 +222,6 @@
std::vector<std::string> command = SleepCommand(5);
std::string error_msg;
- bool timed_out;
ProcessStat stat;
EXPECT_CALL(*exec_utils_, GetProcStat(_))
@@ -230,8 +232,11 @@
EXPECT_CALL(*exec_utils_, GetTicksPerSec()).WillOnce(Return(100));
// This will always time out.
- exec_utils_->ExecAndReturnCode(
- command, /*timeout_sec=*/1, ExecCallbacks(), &timed_out, &stat, &error_msg);
+ ASSERT_EQ(
+ exec_utils_
+ ->ExecAndReturnResult(command, /*timeout_sec=*/1, ExecCallbacks(), &stat, &error_msg)
+ .status,
+ ExecResult::kTimedOut);
EXPECT_EQ(stat.cpu_time_ms, 990);
EXPECT_EQ(stat.wall_time_ms, 1007);
@@ -251,17 +256,14 @@
command.push_back(GetBin("id"));
std::string error_msg;
- bool timed_out;
-
- exec_utils_->ExecAndReturnCode(command,
- /*timeout_sec=*/-1,
- ExecCallbacks{
- .on_start = on_start.AsStdFunction(),
- .on_end = on_end.AsStdFunction(),
- },
- &timed_out,
- /*stat=*/nullptr,
- &error_msg);
+ exec_utils_->ExecAndReturnResult(command,
+ /*timeout_sec=*/-1,
+ ExecCallbacks{
+ .on_start = on_start.AsStdFunction(),
+ .on_end = on_end.AsStdFunction(),
+ },
+ /*stat=*/nullptr,
+ &error_msg);
}
INSTANTIATE_TEST_SUITE_P(AlwaysOrNeverFallback, ExecUtilsTest, testing::Values(true, false));