diff options
| author | 2024-06-06 10:38:58 +0100 | |
|---|---|---|
| committer | 2024-06-06 17:01:16 +0100 | |
| commit | 4f644c3744f6450a298be14cf9c5718acfe61385 (patch) | |
| tree | 479bea533525f2c1a208e5b6ded132771ecec3ec | |
| parent | 845cfa9fc43032ba14a564d830f3893dc6e65a5b (diff) | |
Make teardown faster and more reliable.
Before this change, we only trigger GC and sleep 5 seconds, hoping that
processes in chroot exit within that 5 seconds.
After this change, we use pidfd to wait for processes to exit, so
teardown can be performed as soon as all processes have exited, making
teardown considerably faster.
Also, if the processes don't exit within 5 seconds, we kill them, making
teardown more reliable.
Only one more SELinux permission is needed, which is the permission to
send `sigkill` to artd and its subprocesses. system_server already has
all the other permissions to perform the operations used in this CL.
1. system_server has supplementary group "readproc" (3009), to list all
pids in "/proc", bypassing "hidepid=2" (see proc(5)).
- https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/com/android/internal/os/ZygoteInit.java;l=658;drc=7d3ffbae618e9e728644a96647ed709bf39ae759
2. system_server can read /proc/<pid>/* for all domains.
- https://cs.android.com/android/platform/superproject/main/+/main:system/sepolicy/private/system_server.te;l=213;drc=ca2f3851afaee866d37caae16598b3d5c20844d4
3. system_server has CAP_SYS_PTRACE, to read the "/proc/<pid>/exe" link
of anyprocess (see proc(5)).
- https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/com/android/internal/os/ZygoteInit.java;l=635;drc=7d3ffbae618e9e728644a96647ed709bf39ae759
- https://cs.android.com/android/platform/superproject/main/+/main:system/sepolicy/private/system_server.te;l=145;drc=ca2f3851afaee866d37caae16598b3d5c20844d4
4. system_server has CAP_KILL, to kill processes that belong to other
users (see kill(2)).
- https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/com/android/internal/os/ZygoteInit.java;l=628;drc=7d3ffbae618e9e728644a96647ed709bf39ae759
- https://cs.android.com/android/platform/superproject/main/+/main:system/sepolicy/private/system_server.te;l=138;drc=ca2f3851afaee866d37caae16598b3d5c20844d4
Bug: 311377497
Test: m test-art-host-gtest-art_libarttools_tests
Test: atest art_standalone_libarttools_tests --iterations 10
Test: adb shell pm art pr-dexopt-job --test
Test: Run and cancel Pre-reboot Dexopt multiple times.
Change-Id: I1e41cd71402944e31b33e410ac1635766afe55c4
| -rw-r--r-- | libartbase/base/pidfd.h | 45 | ||||
| -rw-r--r-- | libartservice/service/java/com/android/server/art/ArtJni.java | 34 | ||||
| -rw-r--r-- | libartservice/service/java/com/android/server/art/prereboot/PreRebootDriver.java | 72 | ||||
| -rw-r--r-- | libartservice/service/java/com/android/server/art/prereboot/PreRebootManager.java | 2 | ||||
| -rw-r--r-- | libartservice/service/native/service.cc | 15 | ||||
| -rw-r--r-- | libarttools/art_exec_test.cc | 66 | ||||
| -rw-r--r-- | libarttools/include/tools/tools.h | 6 | ||||
| -rw-r--r-- | libarttools/testing.h | 93 | ||||
| -rw-r--r-- | libarttools/tools.cc | 123 | ||||
| -rw-r--r-- | libarttools/tools_test.cc | 107 | ||||
| -rw-r--r-- | runtime/exec_utils.cc | 17 |
11 files changed, 464 insertions, 116 deletions
diff --git a/libartbase/base/pidfd.h b/libartbase/base/pidfd.h new file mode 100644 index 0000000000..d209d931c8 --- /dev/null +++ b/libartbase/base/pidfd.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_LIBARTBASE_BASE_PIDFD_H_ +#define ART_LIBARTBASE_BASE_PIDFD_H_ + +#include <cstdint> +#include <cstdio> + +#include "android-base/unique_fd.h" + +#ifdef __BIONIC__ +#include <sys/pidfd.h> +#endif + +namespace art { + +[[maybe_unused]] static android::base::unique_fd PidfdOpen(pid_t pid, uint32_t flags) { +#ifdef __BIONIC__ + return android::base::unique_fd(pidfd_open(pid, flags)); +#else + // There is no glibc wrapper for pidfd_open. +#ifndef SYS_pidfd_open + constexpr int SYS_pidfd_open = 434; +#endif + return android::base::unique_fd(syscall(SYS_pidfd_open, pid, flags)); +#endif +} + +} // namespace art + +#endif // ART_LIBARTBASE_BASE_PIDFD_H_ diff --git a/libartservice/service/java/com/android/server/art/ArtJni.java b/libartservice/service/java/com/android/server/art/ArtJni.java index 873b5642c5..62b53dd6c8 100644 --- a/libartservice/service/java/com/android/server/art/ArtJni.java +++ b/libartservice/service/java/com/android/server/art/ArtJni.java @@ -25,6 +25,8 @@ import androidx.annotation.RequiresApi; import dalvik.system.VMRuntime; +import java.io.IOException; + /** * JNI methods for ART Service with wrappers. * @@ -129,10 +131,42 @@ public class ArtJni { return null; } + /** + * Waits for processes whose executable is in the given directory to exit, and kills them if + * they don't exit within the timeout. + * + * Note that this method only checks processes' executable paths, not their open files. If the + * executable of a process is outside of the given directory but the process opens a file in + * that directory, this method doesn't handle it. + * + * After killing, the method waits another round with the given timeout. Theoretically, this + * method can take at most {@code 2 * timeoutMs}. However, the second round should be pretty + * fast in practice. + * + * This method assumes that no new process is started from an executable in the given directory + * while the method is running. It is the callers responsibility to make sure that this + * assumption holds. + * + * @throws IllegalArgumentException if {@code timeoutMs} is negative + * @throws IOException if the operation fails + */ + public static Void ensureNoProcessInDir(@NonNull String dir, int timeoutMs) throws IOException { + if (GlobalInjector.getInstance().isPreReboot()) { + // We don't need this for Pre-reboot Dexopt. + throw new UnsupportedOperationException(); + } + ensureNoProcessInDirNative(dir, timeoutMs); + // Return a placeholder value to make this method easier to mock. There is no good way to + // mock a method that is both void and static, due to the poor design of Mockito API. + return null; + } + @Nullable private static native String validateDexPathNative(@NonNull String dexPath); @Nullable private static native String validateClassLoaderContextNative( @NonNull String dexPath, @NonNull String classLoaderContext); @NonNull private static native String getGarbageCollectorNative(); private static native void setPropertyNative(@NonNull String key, @NonNull String value); + private static native void ensureNoProcessInDirNative(@NonNull String dir, int timeoutMs) + throws IOException; } diff --git a/libartservice/service/java/com/android/server/art/prereboot/PreRebootDriver.java b/libartservice/service/java/com/android/server/art/prereboot/PreRebootDriver.java index a042628846..b20eefd7d9 100644 --- a/libartservice/service/java/com/android/server/art/prereboot/PreRebootDriver.java +++ b/libartservice/service/java/com/android/server/art/prereboot/PreRebootDriver.java @@ -33,6 +33,7 @@ import android.system.Os; import androidx.annotation.RequiresApi; import com.android.internal.annotations.VisibleForTesting; +import com.android.server.art.ArtJni; import com.android.server.art.ArtManagerLocal; import com.android.server.art.ArtModuleServiceInitializer; import com.android.server.art.ArtdRefCache; @@ -103,10 +104,17 @@ public class PreRebootDriver { } catch (ReflectiveOperationException | IOException | ErrnoException e) { AsLog.e("Failed to run pre-reboot dexopt", e); } finally { - // No need to pass `mapSnapshotsForOta` because `setUp` stores this information in a - // temp file. - tearDown(false /* throwing */); - statsReporter.recordJobEnded(success); + try { + // No need to pass `mapSnapshotsForOta` because `setUp` stores this information in a + // temp file. + tearDown(); + } catch (RemoteException e) { + Utils.logArtdException(e); + } catch (ServiceSpecificException | IOException e) { + AsLog.e("Failed to tear down chroot", e); + } finally { + statsReporter.recordJobEnded(success); + } } return false; } @@ -122,12 +130,16 @@ public class PreRebootDriver { // dexopt only one package, and that can easily make the test fail the CTS quality // requirement on test duration (<30s). teardownAttempted = true; - tearDown(true /* throwing */); - } catch (RemoteException e) { + tearDown(); + } catch (RemoteException | IOException e) { throw new AssertionError("Unexpected exception", e); } finally { if (!teardownAttempted) { - tearDown(false /* throwing */); + try { + tearDown(); + } catch (RemoteException | IOException | RuntimeException e) { + // Do nothing. + } } } } @@ -142,42 +154,22 @@ public class PreRebootDriver { return true; } - /** @param throwing Throws {@link RuntimeException} on failure. */ - private void tearDown(boolean throwing) { + private void tearDown() throws RemoteException, IOException { // In general, the teardown unmounts apexes and partitions, and open files can keep the // mounts busy so that they cannot be unmounted. Therefore, a running Pre-reboot artd // process can prevent the teardown from succeeding. It's managed by the service manager, - // and there isn't a reliable API to kill it, so we have to kill it by triggering GC and - // finalization, with sleep and retry mechanism. - Throwable lastThrowable = null; - for (int numRetries = 3; numRetries > 0;) { - try { - Runtime.getRuntime().gc(); - Runtime.getRuntime().runFinalization(); - // Wait for the service manager to shut down artd. The shutdown is asynchronous. - Utils.sleep(5000); - mInjector.getDexoptChrootSetup().tearDown(); - return; - } catch (RemoteException e) { - Utils.logArtdException(e); - lastThrowable = e; - } catch (ServiceSpecificException e) { - AsLog.e("Failed to tear down chroot", e); - lastThrowable = e; - } catch (IllegalStateException e) { - // Not expected, but we still want retries in such an extreme case. - AsLog.wtf("Unexpected exception", e); - lastThrowable = e; - } - - if (--numRetries > 0) { - AsLog.i("Retrying...."); - Utils.sleep(30000); - } - } - if (throwing) { - throw Utils.toRuntimeException(lastThrowable); - } + // and there isn't a reliable API to kill it. We deal with it in two steps: + // 1. Trigger GC and finalization. The service manager should gracefully shut it down, since + // there is no reference to it as this point. + // 2. Call `ensureNoProcessInDir` to wait for it to exit. If it doesn't exit in 5 seconds, + // `ensureNoProcessInDir` will then kill it. + Runtime.getRuntime().gc(); + Runtime.getRuntime().runFinalization(); + // At this point, no process other than `artd` is expected to be running. `runFromChroot` + // blocks on `artd` calls, even upon cancellation, and `artd` in turn waits for child + // processes to exit, even if they are killed due to the cancellation. + ArtJni.ensureNoProcessInDir(CHROOT_DIR, 5000 /* timeoutMs */); + mInjector.getDexoptChrootSetup().tearDown(); } private void runFromChroot(@NonNull CancellationSignal cancellationSignal) diff --git a/libartservice/service/java/com/android/server/art/prereboot/PreRebootManager.java b/libartservice/service/java/com/android/server/art/prereboot/PreRebootManager.java index 2d9b087914..40cb61576b 100644 --- a/libartservice/service/java/com/android/server/art/prereboot/PreRebootManager.java +++ b/libartservice/service/java/com/android/server/art/prereboot/PreRebootManager.java @@ -116,7 +116,7 @@ public class PreRebootManager implements PreRebootManagerInterface { callbackExecutor.shutdown(); try { // Make sure we have no running threads when we tear down. - callbackExecutor.awaitTermination(Integer.MAX_VALUE, TimeUnit.SECONDS); + callbackExecutor.awaitTermination(5, TimeUnit.SECONDS); } catch (InterruptedException e) { AsLog.wtf("Interrupted", e); } diff --git a/libartservice/service/native/service.cc b/libartservice/service/native/service.cc index 35296a7c34..e97d871dac 100644 --- a/libartservice/service/native/service.cc +++ b/libartservice/service/native/service.cc @@ -30,6 +30,7 @@ #include "nativehelper/JNIHelp.h" #include "nativehelper/utils.h" #include "runtime.h" +#include "tools/tools.h" namespace art { namespace service { @@ -150,5 +151,19 @@ extern "C" JNIEXPORT void JNICALL Java_com_android_server_art_ArtJni_setProperty } } +extern "C" JNIEXPORT void JNICALL Java_com_android_server_art_ArtJni_ensureNoProcessInDirNative( + JNIEnv* env, jobject, jstring j_dir, jint j_timeout_ms) { + if (j_timeout_ms < 0) { + jniThrowExceptionFmt( + env, "java/lang/IllegalArgumentException", "Negative timeout '%d'", j_timeout_ms); + return; + } + std::string dir(GET_UTF_OR_RETURN_VOID(env, j_dir)); + if (Result<void> result = tools::EnsureNoProcessInDir(dir, j_timeout_ms, /*try_kill=*/true); + !result.ok()) { + jniThrowException(env, "java/io/IOException", result.error().message().c_str()); + } +} + } // namespace service } // namespace art diff --git a/libarttools/art_exec_test.cc b/libarttools/art_exec_test.cc index 63f237c342..c1f15dc187 100644 --- a/libarttools/art_exec_test.cc +++ b/libarttools/art_exec_test.cc @@ -17,21 +17,13 @@ #include <sys/capability.h> #include <sys/resource.h> #include <sys/types.h> -#include <sys/wait.h> -#include <unistd.h> -#include <csignal> -#include <filesystem> -#include <functional> #include <string> -#include <utility> #include "android-base/file.h" #include "android-base/logging.h" -#include "android-base/scopeguard.h" #include "android-base/strings.h" #include "base/common_art_test.h" -#include "base/file_utils.h" #include "base/globals.h" #include "base/macros.h" #include "base/os.h" @@ -40,16 +32,16 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "system/thread_defs.h" +#include "testing.h" #ifdef ART_TARGET_ANDROID #include "android-modules-utils/sdk_level.h" #endif namespace art { +namespace tools { namespace { -using ::android::base::make_scope_guard; -using ::android::base::ScopeGuard; using ::android::base::Split; using ::testing::Contains; using ::testing::ElementsAre; @@ -59,45 +51,6 @@ using ::testing::Not; constexpr uid_t kRoot = 0; constexpr uid_t kNobody = 9999; -std::string GetArtBin(const std::string& name) { - return ART_FORMAT("{}/bin/{}", GetArtRoot(), name); -} - -std::string GetBin(const std::string& name) { - return ART_FORMAT("{}/bin/{}", GetAndroidRoot(), name); -} - -// Executes the command, waits for it to finish, and keeps it in a waitable state until the current -// scope exits. -std::pair<pid_t, ScopeGuard<std::function<void()>>> ScopedExecAndWait( - std::vector<std::string>& args) { - std::vector<char*> execv_args; - execv_args.reserve(args.size() + 1); - for (std::string& arg : args) { - execv_args.push_back(arg.data()); - } - execv_args.push_back(nullptr); - - pid_t pid = fork(); - if (pid == 0) { - execv(execv_args[0], execv_args.data()); - UNREACHABLE(); - } else if (pid > 0) { - siginfo_t info; - CHECK_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED | WNOWAIT)), 0); - CHECK_EQ(info.si_code, CLD_EXITED); - CHECK_EQ(info.si_status, 0); - std::function<void()> cleanup([=] { - siginfo_t info; - CHECK_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED)), 0); - }); - return std::make_pair(pid, make_scope_guard(std::move(cleanup))); - } else { - LOG(FATAL) << "Failed to call fork"; - UNREACHABLE(); - } -} - // Grants the current process the given root capability. void SetCap(cap_flag_t flag, cap_value_t value) { ScopedCap cap(cap_get_proc()); @@ -156,7 +109,7 @@ TEST_F(ArtExecTest, SetTaskProfiles) { GetBin("sh"), "-c", "cat /proc/self/cgroup > " + filename}; - auto [pid, scope_guard] = ScopedExecAndWait(args); + auto [pid, scope_guard] = ScopedExec(args, /*wait=*/true); std::string cgroup; ASSERT_TRUE(android::base::ReadFileToString(filename, &cgroup)); EXPECT_THAT(cgroup, HasSubstr(":cpuset:/foreground\n")); @@ -164,7 +117,7 @@ TEST_F(ArtExecTest, SetTaskProfiles) { TEST_F(ArtExecTest, SetPriority) { std::vector<std::string> args{art_exec_bin_, "--set-priority=background", "--", GetBin("true")}; - auto [pid, scope_guard] = ScopedExecAndWait(args); + auto [pid, scope_guard] = ScopedExec(args, /*wait=*/true); EXPECT_EQ(getpriority(PRIO_PROCESS, pid), ANDROID_PRIORITY_BACKGROUND); } @@ -180,13 +133,13 @@ TEST_F(ArtExecTest, DropCapabilities) { // inherited root capability: CAP_FOWNER). { std::vector<std::string> args{art_exec_bin_, "--", GetBin("true")}; - auto [pid, scope_guard] = ScopedExecAndWait(args); + auto [pid, scope_guard] = ScopedExec(args, /*wait=*/true); ASSERT_TRUE(GetCap(pid, CAP_EFFECTIVE, CAP_FOWNER)); } { std::vector<std::string> args{art_exec_bin_, "--drop-capabilities", "--", GetBin("true")}; - auto [pid, scope_guard] = ScopedExecAndWait(args); + auto [pid, scope_guard] = ScopedExec(args, /*wait=*/true); EXPECT_FALSE(GetCap(pid, CAP_EFFECTIVE, CAP_FOWNER)); } } @@ -218,7 +171,7 @@ TEST_F(ArtExecTest, CloseFds) { file3->Fd(), filename)}; - ScopedExecAndWait(args); + ScopedExec(args, /*wait=*/true); std::string open_fds; ASSERT_TRUE(android::base::ReadFileToString(filename, &open_fds)); @@ -235,7 +188,7 @@ TEST_F(ArtExecTest, Env) { std::vector<std::string> args{ art_exec_bin_, "--env=FOO=BAR", "--", GetBin("sh"), "-c", "env > " + filename}; - ScopedExecAndWait(args); + ScopedExec(args, /*wait=*/true); std::string envs; ASSERT_TRUE(android::base::ReadFileToString(filename, &envs)); @@ -256,7 +209,7 @@ TEST_F(ArtExecTest, ProcessNameSuffix) { "/proc/self/cmdline", filename}; - ScopedExecAndWait(args); + ScopedExec(args, /*wait=*/true); std::string cmdline; ASSERT_TRUE(android::base::ReadFileToString(filename, &cmdline)); @@ -265,4 +218,5 @@ TEST_F(ArtExecTest, ProcessNameSuffix) { } } // namespace +} // namespace tools } // namespace art diff --git a/libarttools/include/tools/tools.h b/libarttools/include/tools/tools.h index f536e030c4..9399bcd49d 100644 --- a/libarttools/include/tools/tools.h +++ b/libarttools/include/tools/tools.h @@ -17,6 +17,7 @@ #ifndef ART_LIBARTTOOLS_INCLUDE_TOOLS_TOOLS_H_ #define ART_LIBARTTOOLS_INCLUDE_TOOLS_TOOLS_H_ +#include <cstdint> #include <string> #include <string_view> #include <vector> @@ -56,6 +57,11 @@ android::base::Result<std::vector<android::fs_mgr::FstabEntry>> GetProcMountsAnc android::base::Result<std::vector<android::fs_mgr::FstabEntry>> GetProcMountsDescendantsOfPath( std::string_view path); +// See `ArtJni.ensureNoProcessInDir`. +android::base::Result<void> EnsureNoProcessInDir(const std::string& dir, + uint32_t timeout_ms, + bool try_kill); + } // namespace tools } // namespace art diff --git a/libarttools/testing.h b/libarttools/testing.h new file mode 100644 index 0000000000..eaa8523a1b --- /dev/null +++ b/libarttools/testing.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_LIBARTTOOLS_TESTING_H_ +#define ART_LIBARTTOOLS_TESTING_H_ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <functional> +#include <string> +#include <utility> +#include <vector> + +#include "android-base/logging.h" +#include "android-base/scopeguard.h" +#include "base/file_utils.h" +#include "base/globals.h" +#include "base/macros.h" + +namespace art { +namespace tools { + +using ::android::base::make_scope_guard; +using ::android::base::ScopeGuard; + +[[maybe_unused]] static std::string GetArtBin(const std::string& name) { + CHECK(kIsTargetAndroid); + return ART_FORMAT("{}/bin/{}", GetArtRoot(), name); +} + +[[maybe_unused]] static std::string GetBin(const std::string& name) { + CHECK(kIsTargetAndroid); + return ART_FORMAT("{}/bin/{}", GetAndroidRoot(), name); +} + +// Executes the command. If the `wait` is true, waits for the process to finish and keeps it in a +// waitable state; otherwise, returns immediately after fork. When the current scope exits, destroys +// the process. +[[maybe_unused]] static std::pair<pid_t, ScopeGuard<std::function<void()>>> ScopedExec( + std::vector<std::string>& args, bool wait) { + std::vector<char*> execv_args; + execv_args.reserve(args.size() + 1); + for (std::string& arg : args) { + execv_args.push_back(arg.data()); + } + execv_args.push_back(nullptr); + + pid_t pid = fork(); + if (pid == 0) { + execv(execv_args[0], execv_args.data()); + UNREACHABLE(); + } else if (pid > 0) { + if (wait) { + siginfo_t info; + CHECK_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED | WNOWAIT)), 0); + CHECK_EQ(info.si_code, CLD_EXITED); + CHECK_EQ(info.si_status, 0); + } + std::function<void()> cleanup([=] { + siginfo_t info; + if (!wait) { + CHECK_EQ(kill(pid, SIGKILL), 0); + } + CHECK_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED)), 0); + }); + return std::make_pair(pid, make_scope_guard(std::move(cleanup))); + } else { + LOG(FATAL) << "Failed to call fork"; + UNREACHABLE(); + } +} + +} // namespace tools +} // namespace art + +#endif // ART_LIBARTTOOLS_TESTING_H_ diff --git a/libarttools/tools.cc b/libarttools/tools.cc index e355f6d1fa..0dadfde564 100644 --- a/libarttools/tools.cc +++ b/libarttools/tools.cc @@ -18,21 +18,34 @@ #include <errno.h> #include <fnmatch.h> +#include <poll.h> +#include <signal.h> +#include <stdio.h> +#include <unistd.h> #include <algorithm> +#include <cstdint> +#include <ctime> #include <filesystem> #include <functional> #include <regex> #include <string> #include <string_view> #include <system_error> +#include <unordered_map> +#include <utility> #include <vector> +#include "android-base/errors.h" +#include "android-base/file.h" #include "android-base/function_ref.h" #include "android-base/logging.h" +#include "android-base/process.h" #include "android-base/result.h" #include "android-base/strings.h" +#include "android-base/unique_fd.h" #include "base/macros.h" +#include "base/pidfd.h" #include "fstab/fstab.h" namespace art { @@ -40,15 +53,25 @@ namespace tools { namespace { +using ::android::base::AllPids; using ::android::base::ConsumeSuffix; using ::android::base::function_ref; +using ::android::base::ReadFileToString; +using ::android::base::Readlink; using ::android::base::Result; using ::android::base::StartsWith; +using ::android::base::unique_fd; using ::android::fs_mgr::Fstab; using ::android::fs_mgr::FstabEntry; using ::android::fs_mgr::ReadFstabFromProcMounts; using ::std::placeholders::_1; +uint64_t MilliTime() { + timespec now; + clock_gettime(CLOCK_MONOTONIC, &now); + return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000) + now.tv_nsec / UINT64_C(1000000); +} + // Returns true if `path_prefix` matches `pattern` or can be a prefix of a path that matches // `pattern` (i.e., `path_prefix` represents a directory that may contain a file whose path matches // `pattern`). @@ -194,5 +217,105 @@ Result<std::vector<FstabEntry>> GetProcMountsDescendantsOfPath(std::string_view [&](std::string_view mount_point) { return PathStartsWith(mount_point, path); }); } +Result<void> EnsureNoProcessInDir(const std::string& dir, uint32_t timeout_ms, bool try_kill) { + // Pairs of pid and process name, indexed by pidfd. + std::unordered_map<int, std::pair<pid_t, std::string>> running_processes; + std::vector<struct pollfd> pollfds; + std::vector<unique_fd> pidfds; + + for (pid_t pid : AllPids()) { + std::string exe; + if (!Readlink(ART_FORMAT("/proc/{}/exe", pid), &exe)) { + // The caller may not have access to all processes. That's okay. When using this method, we + // must grant the caller access to the processes that we are interested in. + continue; + } + + if (PathStartsWith(exe, dir)) { + unique_fd pidfd = PidfdOpen(pid, /*flags=*/0); + if (pidfd < 0) { + if (errno == ESRCH) { + // The process has gone now. + continue; + } + return ErrnoErrorf("Failed to pidfd_open {}", pid); + } + + std::string name; + if (!ReadFileToString(ART_FORMAT("/proc/{}/comm", pid), &name)) { + PLOG(WARNING) << "Failed to get process name for pid " << pid; + } + size_t pos = name.find_first_of("\n\0"); + if (pos != std::string::npos) { + name.resize(pos); + } + LOG(INFO) << ART_FORMAT( + "Process '{}' (pid: {}) is still running. Waiting for it to exit", name, pid); + + struct pollfd& pollfd = pollfds.emplace_back(); + pollfd.fd = pidfd.get(); + pollfd.events = POLLIN; + + running_processes[pidfd.get()] = std::make_pair(pid, std::move(name)); + pidfds.push_back(std::move(pidfd)); + } + } + + auto wait_for_processes = [&]() -> Result<void> { + uint64_t start_time_ms = MilliTime(); + uint64_t remaining_timeout_ms = timeout_ms; + while (!running_processes.empty() && remaining_timeout_ms > 0) { + int poll_ret = TEMP_FAILURE_RETRY(poll(pollfds.data(), pollfds.size(), remaining_timeout_ms)); + if (poll_ret < 0) { + return ErrnoErrorf("Failed to poll pidfd's"); + } + if (poll_ret == 0) { + // Timeout. + break; + } + uint64_t elapsed_time_ms = MilliTime() - start_time_ms; + for (struct pollfd& pollfd : pollfds) { + if (pollfd.fd < 0) { + continue; + } + if ((pollfd.revents & POLLIN) != 0) { + const auto& [pid, name] = running_processes[pollfd.fd]; + LOG(INFO) << ART_FORMAT( + "Process '{}' (pid: {}) exited in {}ms", name, pid, elapsed_time_ms); + running_processes.erase(pollfd.fd); + pollfd.fd = -1; + } + } + remaining_timeout_ms = timeout_ms - elapsed_time_ms; + } + return {}; + }; + + OR_RETURN(wait_for_processes()); + + bool process_killed = false; + for (const auto& [pidfd, pair] : running_processes) { + const auto& [pid, name] = pair; + LOG(ERROR) << ART_FORMAT( + "Process '{}' (pid: {}) is still running after {}ms", name, pid, timeout_ms); + if (try_kill) { + LOG(INFO) << ART_FORMAT("Killing '{}' (pid: {})", name, pid); + if (kill(pid, SIGKILL) != 0) { + PLOG(ERROR) << ART_FORMAT("Failed to kill '{}' (pid: {})", name, pid); + } + process_killed = true; + } + } + + if (process_killed) { + // Wait another round for processes to exit after being killed. + OR_RETURN(wait_for_processes()); + } + if (!running_processes.empty()) { + return Errorf("Some process(es) are still running after {}ms", timeout_ms); + } + return {}; +} + } // namespace tools } // namespace art diff --git a/libarttools/tools_test.cc b/libarttools/tools_test.cc index 43fa3f42c4..fe2d6d4868 100644 --- a/libarttools/tools_test.cc +++ b/libarttools/tools_test.cc @@ -16,26 +16,32 @@ #include "tools/tools.h" -#include <algorithm> +#include <stdlib.h> +#include <unistd.h> + #include <filesystem> -#include <iterator> #include "android-base/file.h" +#include "android-base/result.h" #include "base/common_art_test.h" +#include "base/globals.h" +#include "base/time_utils.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "testing.h" namespace art { namespace tools { namespace { +using ::android::base::Result; using ::android::base::WriteStringToFile; using ::testing::UnorderedElementsAre; -void CreateFile(const std::string& filename) { +void CreateFile(const std::string& filename, const std::string& content = "") { std::filesystem::path path(filename); std::filesystem::create_directories(path.parent_path()); - ASSERT_TRUE(WriteStringToFile(/*content=*/"", filename)); + ASSERT_TRUE(WriteStringToFile(content, filename)); } class ArtToolsTest : public CommonArtTest { @@ -157,6 +163,99 @@ TEST_F(ArtToolsTest, PathStartsWith) { EXPECT_FALSE(PathStartsWith("/", "/a")); } +class ArtToolsEnsureNoProcessInDirTest : public ArtToolsTest { + protected: + void SetUp() override { + ArtToolsTest::SetUp(); + + related_dir_ = scratch_path_ + "/related"; + unrelated_dir_ = scratch_path_ + "/unrelated"; + + std::string sleep_bin = GetSleepBin(); + if (sleep_bin.empty()) { + GTEST_SKIP() << "'sleep' is not available"; + } + + std::filesystem::create_directories(related_dir_); + std::filesystem::create_directories(unrelated_dir_); + std::filesystem::copy(sleep_bin, related_dir_ + "/sleep"); + std::filesystem::copy(sleep_bin, unrelated_dir_ + "/sleep"); + } + + std::string related_dir_; + std::string unrelated_dir_; + + private: + std::string GetSleepBin() { + if constexpr (kIsTargetAndroid) { + return GetBin("sleep"); + } + if (access("/usr/bin/sleep", X_OK) == 0) { + return "/usr/bin/sleep"; + } + return ""; + } +}; + +TEST_F(ArtToolsEnsureNoProcessInDirTest, WaitsProcesses) { + std::vector<std::string> args_1{related_dir_ + "/sleep", "0.3"}; + auto [pid_1, scope_guard_1] = ScopedExec(args_1, /*wait=*/false); + std::vector<std::string> args_2{unrelated_dir_ + "/sleep", "2"}; + auto [pid_2, scope_guard_2] = ScopedExec(args_2, /*wait=*/false); + NanoSleep(100'000'000); // Wait for child processes to exec. + + ASSERT_RESULT_OK(EnsureNoProcessInDir(related_dir_, /*timeout_ms=*/5000, /*try_kill=*/false)); + + // Check the current status of the process with `WNOHANG`. The related process should have exited, + // so `si_signo` should be `SIGCHLD`. + siginfo_t info; + ASSERT_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid_1, &info, WEXITED | WNOWAIT | WNOHANG)), 0); + EXPECT_EQ(info.si_signo, SIGCHLD); + EXPECT_EQ(info.si_code, CLD_EXITED); + EXPECT_EQ(info.si_status, 0); + + // The method should not wait on unrelated processes. The unrelated process should not have + // exited, so `si_signo` should be 0. + ASSERT_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid_2, &info, WEXITED | WNOWAIT | WNOHANG)), 0); + EXPECT_EQ(info.si_signo, 0); +} + +TEST_F(ArtToolsEnsureNoProcessInDirTest, TimesOut) { + std::vector<std::string> args{related_dir_ + "/sleep", "5"}; + auto [pid, scope_guard] = ScopedExec(args, /*wait=*/false); + NanoSleep(100'000'000); // Wait for child processes to exec. + + Result<void> result = EnsureNoProcessInDir(related_dir_, /*timeout_ms=*/200, /*try_kill=*/false); + EXPECT_FALSE(result.ok()); + EXPECT_EQ(result.error().message(), "Some process(es) are still running after 200ms"); + + // The process should not have exited. + siginfo_t info; + ASSERT_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED | WNOWAIT | WNOHANG)), 0); + EXPECT_EQ(info.si_signo, 0); +} + +TEST_F(ArtToolsEnsureNoProcessInDirTest, KillsProcesses) { + std::vector<std::string> args_1{related_dir_ + "/sleep", "5"}; + auto [pid_1, scope_guard_1] = ScopedExec(args_1, /*wait=*/false); + std::vector<std::string> args_2{unrelated_dir_ + "/sleep", "5"}; + auto [pid_2, scope_guard_2] = ScopedExec(args_2, /*wait=*/false); + NanoSleep(100'000'000); // Wait for child processes to exec. + + ASSERT_RESULT_OK(EnsureNoProcessInDir(related_dir_, /*timeout_ms=*/200, /*try_kill=*/true)); + + // The related process should have been killed. + siginfo_t info; + ASSERT_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid_1, &info, WEXITED | WNOWAIT | WNOHANG)), 0); + EXPECT_EQ(info.si_signo, SIGCHLD); + EXPECT_EQ(info.si_code, CLD_KILLED); + EXPECT_EQ(info.si_status, SIGKILL); + + // The unrelated process should still be running. + ASSERT_EQ(TEMP_FAILURE_RETRY(waitid(P_PID, pid_2, &info, WEXITED | WNOWAIT | WNOHANG)), 0); + EXPECT_EQ(info.si_signo, 0); +} + } // namespace } // namespace tools } // namespace art diff --git a/runtime/exec_utils.cc b/runtime/exec_utils.cc index 1914d506cd..17550d5aae 100644 --- a/runtime/exec_utils.cc +++ b/runtime/exec_utils.cc @@ -23,10 +23,6 @@ #include <sys/wait.h> #include <unistd.h> -#ifdef __BIONIC__ -#include <sys/pidfd.h> -#endif - #include <chrono> #include <climits> #include <condition_variable> @@ -47,6 +43,7 @@ #include "android-base/strings.h" #include "android-base/unique_fd.h" #include "base/macros.h" +#include "base/pidfd.h" #include "base/utils.h" #include "runtime.h" @@ -336,17 +333,7 @@ bool ExecUtils::Exec(const std::vector<std::string>& arg_vector, std::string* er return true; } -unique_fd ExecUtils::PidfdOpen(pid_t pid) const { -#ifdef __BIONIC__ - return unique_fd(pidfd_open(pid, /*flags=*/0)); -#else - // There is no glibc wrapper for pidfd_open. -#ifndef SYS_pidfd_open - constexpr int SYS_pidfd_open = 434; -#endif - return unique_fd(syscall(SYS_pidfd_open, pid, /*flags=*/0)); -#endif -} +unique_fd ExecUtils::PidfdOpen(pid_t pid) const { return art::PidfdOpen(pid, /*flags=*/0); } std::string ExecUtils::GetProcStat(pid_t pid) const { std::string stat_content; |