diff options
author | 2024-06-10 16:28:02 +0100 | |
---|---|---|
committer | 2024-06-11 12:56:52 +0000 | |
commit | 0239c6d0185d7ab4f864b63bee4a6855ca499b89 (patch) | |
tree | 1aebed8c6b26b886b73dddd6dff9aa684f4b111c | |
parent | d763615a736611f4502cce7fc2516d93737d676c (diff) |
Use process groups to manage artd's subprocesses.
This change adds a new option `new_process_group` to ExecUtils, and this
option is only used by artd to manage its subprocesses. artd creates a
process group for each child process and its descendants, and it uses
`kill(-pid, SIGKILL)` to kill the whole process group upon job
cancellation.
Before this change, ExecUtils always creates a new process group for a
child process. This behavior can be dated back to
https://android.googlesource.com/platform/art/+/5643b786dd48df1f4b89a39e6024a22d032c79d4%5E%21/.
A comment on the `setpgid` call says: "change process groups, so we
don't get reaped by ProcessManager".
We cannot find out why changing the process group was needed or how
process reaping was related to process groups at that time because there
isn't such a thing called "ProcessManager" in Android anymore. However,
we don't need this behavior today because the usage of the code that
does fork and exec has completely changed.
At the time where the original CL was submitted, the code was for the
runtime to generate a persistent boot image on /data. Today, the runtime
never generates a persistent boot image because it's considered insecure.
Today, ExecUtils is mostly used in tests. Other than tests, it's only
used in four places: Zygote, odrefresh, artd, and dexopt_chroot_setup.
- Zygote uses it to call dex2oat to create an in-memory boot image. It
doesn't make sense to keep dex2oat running when Zygote (the memfd
owner) is killed.
- odrefresh uses it to call dex2oat to create persistent boot images. We
do want to kill dex2oat when killing odrefresh.
- artd uses it to call dex2oat, profman, derive_classpath, and
odrefresh. It sets `new_process_group` to true, so it's behavior
regarding process groups is unchanged. It's a service managed by
the service manager. The service manager uses cgroups, not process
grups, to manage and kill processes, so creating new process groups
for artd's children is fine.
- dexopt_chroot_setup uses it to call apexd and linkerconfig. We do want
to kill apexd and linkerconfig when dexopt_chroot_setup is killed.
Therefore, changing this behavior is fine.
After this change, ExecUtils no longer creates a new process group for a
child process unless `new_process_group` is set to true.
Bug: 345723405
Bug: 311377497
Test: m test-art-host-gtest-art_runtime_tests
Test: m test-art-host-gtest-art_artd_tests
Test: -
1. adb shell pm compile -m speed -f -v com.android.settings
2. adb shell pm art cancel <job-id>
3. No change to the existing behavior: dex2oat is still killed.
Test: -
1. adb shell pm art pr-dexopt-job --run
2. Press Ctrl+C.
3. Both odrefresh and dex2oat are killed.
Change-Id: I3058fc68a6bd5f98617422c1eb0861648b144a51
-rw-r--r-- | artd/artd.cc | 11 | ||||
-rw-r--r-- | artd/artd_test.cc | 7 | ||||
-rw-r--r-- | runtime/exec_utils.cc | 19 | ||||
-rw-r--r-- | runtime/exec_utils.h | 9 | ||||
-rw-r--r-- | runtime/exec_utils_test.cc | 103 |
5 files changed, 115 insertions, 34 deletions
diff --git a/artd/artd.cc b/artd/artd.cc index 2a0fee8efd..094962e027 100644 --- a/artd/artd.cc +++ b/artd/artd.cc @@ -1210,7 +1210,8 @@ ScopedAStatus ArtdCancellationSignal::cancel() { std::lock_guard<std::mutex> lock(mu_); is_cancelled_ = true; for (pid_t pid : pids_) { - int res = kill_(pid, SIGKILL); + // Kill the whole process group. + int res = kill_(-pid, SIGKILL); DCHECK_EQ(res, 0); } return ScopedAStatus::ok(); @@ -1229,7 +1230,7 @@ ExecCallbacks ArtdCancellationSignal::CreateExecCallbacks() { pids_.insert(pid); // Handle cancellation signals sent before the process starts. if (is_cancelled_) { - int res = kill_(pid, SIGKILL); + int res = kill_(-pid, SIGKILL); DCHECK_EQ(res, 0); } }, @@ -1719,8 +1720,10 @@ Result<int> Artd::ExecAndReturnCode(const std::vector<std::string>& args, const ExecCallbacks& callbacks, ProcessStat* stat) const { std::string error_msg; - ExecResult result = - exec_utils_->ExecAndReturnResult(args, timeout_sec, callbacks, stat, &error_msg); + // Create a new process group so that we can kill the process subtree at once by killing the + // process group. + ExecResult result = exec_utils_->ExecAndReturnResult( + args, timeout_sec, callbacks, /*new_process_group=*/true, stat, &error_msg); if (result.status != ExecResult::kExited) { return Error() << error_msg; } diff --git a/artd/artd_test.cc b/artd/artd_test.cc index 86a7e1d750..285dd5022f 100644 --- a/artd/artd_test.cc +++ b/artd/artd_test.cc @@ -304,6 +304,7 @@ class MockExecUtils : public ExecUtils { ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector, int, const ExecCallbacks& callbacks, + bool, ProcessStat* stat, std::string*) const override { Result<int> code = DoExecAndReturnCode(arg_vector, callbacks, stat); @@ -1155,7 +1156,7 @@ TEST_F(ArtdTest, dexoptCancelledBeforeDex2oat) { callbacks.on_end(kPid); return Error(); }); - EXPECT_CALL(mock_kill_, Call(kPid, SIGKILL)); + EXPECT_CALL(mock_kill_, Call(-kPid, SIGKILL)); cancellation_signal->cancel(); @@ -1188,7 +1189,7 @@ TEST_F(ArtdTest, dexoptCancelledDuringDex2oat) { return Error(); }); - EXPECT_CALL(mock_kill_, Call(kPid, SIGKILL)).WillOnce([&](auto, auto) { + EXPECT_CALL(mock_kill_, Call(-kPid, SIGKILL)).WillOnce([&](auto, auto) { // Step 4. process_killed_cv.notify_one(); return 0; @@ -2843,7 +2844,7 @@ TEST_F(ArtdPreRebootTest, preRebootInitCancelled) { return Error(); }); - EXPECT_CALL(mock_kill_, Call(kPid, SIGKILL)).WillOnce([&](auto, auto) { + EXPECT_CALL(mock_kill_, Call(-kPid, SIGKILL)).WillOnce([&](auto, auto) { // Step 4. process_killed_cv.notify_one(); return 0; diff --git a/runtime/exec_utils.cc b/runtime/exec_utils.cc index 7e04cced92..05a7cc6bdc 100644 --- a/runtime/exec_utils.cc +++ b/runtime/exec_utils.cc @@ -62,7 +62,9 @@ std::string ToCommandLine(const std::vector<std::string>& args) { // If there is a runtime (Runtime::Current != nullptr) then the subprocess is created with the // same environment that existed when the runtime was started. // Returns the process id of the child process on success, -1 otherwise. -pid_t ExecWithoutWait(const std::vector<std::string>& arg_vector, std::string* error_msg) { +pid_t ExecWithoutWait(const std::vector<std::string>& arg_vector, + bool new_process_group, + std::string* error_msg) { // Convert the args to char pointers. const char* program = arg_vector[0].c_str(); std::vector<char*> args; @@ -77,8 +79,9 @@ pid_t ExecWithoutWait(const std::vector<std::string>& arg_vector, std::string* e if (pid == 0) { // no allocation allowed between fork and exec - // change process groups, so we don't get reaped by ProcessManager - setpgid(0, 0); + if (new_process_group) { + setpgid(0, 0); + } // (b/30160149): protect subprocesses from modifications to LD_LIBRARY_PATH, etc. // Use the snapshot of the environment from the time the runtime was created. @@ -250,12 +253,18 @@ int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector, ExecResult ExecUtils::ExecAndReturnResult(const std::vector<std::string>& arg_vector, int timeout_sec, std::string* error_msg) const { - return ExecAndReturnResult(arg_vector, timeout_sec, ExecCallbacks(), /*stat=*/nullptr, error_msg); + return ExecAndReturnResult(arg_vector, + timeout_sec, + ExecCallbacks(), + /*new_process_group=*/false, + /*stat=*/nullptr, + error_msg); } ExecResult ExecUtils::ExecAndReturnResult(const std::vector<std::string>& arg_vector, int timeout_sec, const ExecCallbacks& callbacks, + bool new_process_group, /*out*/ ProcessStat* stat, /*out*/ std::string* error_msg) const { if (timeout_sec > INT_MAX / 1000) { @@ -264,7 +273,7 @@ ExecResult ExecUtils::ExecAndReturnResult(const std::vector<std::string>& arg_ve } // Start subprocess. - pid_t pid = ExecWithoutWait(arg_vector, error_msg); + pid_t pid = ExecWithoutWait(arg_vector, new_process_group, error_msg); if (pid == -1) { return {.status = ExecResult::kStartFailed}; } diff --git a/runtime/exec_utils.h b/runtime/exec_utils.h index 8e3e3df15d..ebcc2e4e66 100644 --- a/runtime/exec_utils.h +++ b/runtime/exec_utils.h @@ -94,11 +94,16 @@ class EXPORT ExecUtils { int timeout_sec, /*out*/ std::string* error_msg) const; - // Same as above, but also collects stat of the process and calls callbacks. The stat is collected - // no matter the child process succeeds or not. + // Same as above, but also collects stat of the process, calls callbacks, and supports creating a + // new process group. The stat is collected no matter the child process succeeds or not. + // + // Programs used by artd (odrefresh, dex2oat, profman) should NOT set `new_process_group` to true + // when spawning child processes because artd needs to kill an entire process subtree by killing + // the process group. virtual ExecResult ExecAndReturnResult(const std::vector<std::string>& arg_vector, int timeout_sec, const ExecCallbacks& callbacks, + bool new_process_group, /*out*/ ProcessStat* stat, /*out*/ std::string* error_msg) const; diff --git a/runtime/exec_utils_test.cc b/runtime/exec_utils_test.cc index e3b1dc5713..e1fc6272d5 100644 --- a/runtime/exec_utils_test.cc +++ b/runtime/exec_utils_test.cc @@ -17,8 +17,10 @@ #include "exec_utils.h" #include <sys/utsname.h> +#include <unistd.h> #include <csignal> +#include <cstdio> #include <cstring> #include <filesystem> #include <memory> @@ -221,11 +223,15 @@ TEST_P(ExecUtilsTest, ExecStat) { EXPECT_CALL(*exec_utils_, DoGetUptimeMs()).WillOnce(Return(1620344887ll)); EXPECT_CALL(*exec_utils_, GetTicksPerSec()).WillOnce(Return(100)); - ASSERT_EQ( - exec_utils_ - ->ExecAndReturnResult(command, /*timeout_sec=*/-1, ExecCallbacks(), &stat, &error_msg) - .status, - ExecResult::kExited) + ASSERT_EQ(exec_utils_ + ->ExecAndReturnResult(command, + /*timeout_sec=*/-1, + ExecCallbacks(), + /*new_process_group=*/false, + &stat, + &error_msg) + .status, + ExecResult::kExited) << error_msg; EXPECT_EQ(stat.cpu_time_ms, 990); @@ -247,11 +253,15 @@ TEST_P(ExecUtilsTest, ExecStatNoStartTime) { EXPECT_CALL(*exec_utils_, DoGetUptimeMs()).WillOnce(Return(1620344887ll)); EXPECT_CALL(*exec_utils_, GetTicksPerSec()).WillOnce(Return(100)); - ASSERT_EQ( - exec_utils_ - ->ExecAndReturnResult(command, /*timeout_sec=*/-1, ExecCallbacks(), &stat, &error_msg) - .status, - ExecResult::kExited) + ASSERT_EQ(exec_utils_ + ->ExecAndReturnResult(command, + /*timeout_sec=*/-1, + ExecCallbacks(), + /*new_process_group=*/false, + &stat, + &error_msg) + .status, + ExecResult::kExited) << error_msg; EXPECT_EQ(stat.cpu_time_ms, 0); @@ -268,11 +278,15 @@ TEST_P(ExecUtilsTest, ExecStatNoUptime) { EXPECT_CALL(*exec_utils_, DoGetUptimeMs()) .WillOnce(Return(Result<int64_t>(Errorf("Failed to get uptime")))); - ASSERT_EQ( - exec_utils_ - ->ExecAndReturnResult(command, /*timeout_sec=*/-1, ExecCallbacks(), &stat, &error_msg) - .status, - ExecResult::kExited) + ASSERT_EQ(exec_utils_ + ->ExecAndReturnResult(command, + /*timeout_sec=*/-1, + ExecCallbacks(), + /*new_process_group=*/false, + &stat, + &error_msg) + .status, + ExecResult::kExited) << error_msg; EXPECT_EQ(stat.cpu_time_ms, 0); @@ -293,11 +307,15 @@ TEST_P(ExecUtilsTest, ExecStatFailed) { EXPECT_CALL(*exec_utils_, GetTicksPerSec()).WillOnce(Return(100)); // This will always time out. - ASSERT_EQ( - exec_utils_ - ->ExecAndReturnResult(command, /*timeout_sec=*/1, ExecCallbacks(), &stat, &error_msg) - .status, - ExecResult::kTimedOut); + ASSERT_EQ(exec_utils_ + ->ExecAndReturnResult(command, + /*timeout_sec=*/1, + ExecCallbacks(), + /*new_process_group=*/false, + &stat, + &error_msg) + .status, + ExecResult::kTimedOut); EXPECT_EQ(stat.cpu_time_ms, 990); EXPECT_EQ(stat.wall_time_ms, 1007); @@ -323,6 +341,51 @@ TEST_P(ExecUtilsTest, ExecCallbacks) { .on_start = on_start.AsStdFunction(), .on_end = on_end.AsStdFunction(), }, + /*new_process_group=*/false, + /*stat=*/nullptr, + &error_msg); +} + +TEST_P(ExecUtilsTest, ExecNewProcessGroupTrue) { + auto on_end = [](pid_t pid) { + pid_t pgid = getpgid(pid); + ASSERT_GE(pgid, 0) << strerror(errno); + ASSERT_EQ(pgid, pid); + }; + + std::vector<std::string> command; + command.push_back(GetBin("id")); + + std::string error_msg; + exec_utils_->ExecAndReturnResult(command, + /*timeout_sec=*/-1, + ExecCallbacks{ + .on_end = on_end, + }, + /*new_process_group=*/true, + /*stat=*/nullptr, + &error_msg); +} + +TEST_P(ExecUtilsTest, ExecNewProcessGroupFalse) { + auto on_end = [](pid_t pid) { + pid_t pgid = getpgid(pid); + ASSERT_GE(pgid, 0) << strerror(errno); + pid_t parent_pgid = getpgid(0); + ASSERT_GE(parent_pgid, 0) << strerror(errno); + ASSERT_EQ(pgid, parent_pgid); + }; + + std::vector<std::string> command; + command.push_back(GetBin("id")); + + std::string error_msg; + exec_utils_->ExecAndReturnResult(command, + /*timeout_sec=*/-1, + ExecCallbacks{ + .on_end = on_end, + }, + /*new_process_group=*/false, /*stat=*/nullptr, &error_msg); } |