Fix ExecUtils.WaitChildWithTimeoutFallback by adding a fallback.

`WaitChildWithTimeout` didn't work with old Linux kernels because it
uses a syscall `pidfd_open`, which was added in Linux 5.4.

This change adds a fallback implementation of `WaitChildWithTimeout`
that creates a thread to wait instead of relying on `pidfd_open`.

Also:
- Use android::base::unique_fd to hold pidfd so that the fd is
  guaranteed to be closed.
- Allow timeout_sec to be negative, in which case it blocks until the
  subprocess exits.

Bug: 229268202
Test: m test-art-host-gtest-art_runtime_tests
Change-Id: Iec3f21db135d9a8a3a915372253f1ff3e285e5cf
Merged-In: Iec3f21db135d9a8a3a915372253f1ff3e285e5cf
(cherry picked from commit 99bd8dd2bf8eac1a60bf65e442462753ed7f2147)
diff --git a/runtime/exec_utils.cc b/runtime/exec_utils.cc
index 6e2a5b4..722f70a 100644
--- a/runtime/exec_utils.cc
+++ b/runtime/exec_utils.cc
@@ -21,19 +21,24 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
-#include "base/macros.h"
-
 #ifdef __BIONIC__
 #include <sys/pidfd.h>
 #endif
 
+#include <chrono>
+#include <climits>
+#include <condition_variable>
 #include <cstdint>
+#include <mutex>
 #include <string>
+#include <thread>
 #include <vector>
 
 #include "android-base/scopeguard.h"
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
+#include "android-base/unique_fd.h"
+#include "base/macros.h"
 #include "runtime.h"
 
 namespace art {
@@ -41,6 +46,7 @@
 namespace {
 
 using ::android::base::StringPrintf;
+using ::android::base::unique_fd;
 
 std::string ToCommandLine(const std::vector<std::string>& args) {
   return android::base::Join(args, ' ');
@@ -88,31 +94,86 @@
   }
 }
 
-int WaitChild(pid_t pid, const std::vector<std::string>& arg_vector, std::string* error_msg) {
-  int status = -1;
-  pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
-  if (got_pid != pid) {
-    *error_msg =
-        StringPrintf("Failed to execute (%s) because waitpid failed: wanted %d, got %d: %s",
-                     ToCommandLine(arg_vector).c_str(),
-                     pid,
-                     got_pid,
-                     strerror(errno));
+int WaitChild(pid_t pid,
+              const std::vector<std::string>& arg_vector,
+              bool no_wait,
+              std::string* error_msg) {
+  siginfo_t info;
+  // WNOWAIT leaves the child in a waitable state. The call is still blocking.
+  int options = WEXITED | (no_wait ? WNOWAIT : 0);
+  if (TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, options)) != 0) {
+    *error_msg = StringPrintf("Failed to execute (%s) because waitid failed for pid %d: %s",
+                              ToCommandLine(arg_vector).c_str(),
+                              pid,
+                              strerror(errno));
     return -1;
   }
-  if (!WIFEXITED(status)) {
+  if (info.si_pid != pid) {
+    *error_msg = StringPrintf("Failed to execute (%s) because waitid failed: wanted %d, got %d: %s",
+                              ToCommandLine(arg_vector).c_str(),
+                              pid,
+                              info.si_pid,
+                              strerror(errno));
+    return -1;
+  }
+  if (info.si_code != CLD_EXITED) {
     *error_msg =
         StringPrintf("Failed to execute (%s) because the child process is terminated by signal %d",
                      ToCommandLine(arg_vector).c_str(),
-                     WTERMSIG(status));
+                     info.si_status);
     return -1;
   }
-  return WEXITSTATUS(status);
+  return info.si_status;
+}
+
+int WaitChild(pid_t pid, const std::vector<std::string>& arg_vector, std::string* error_msg) {
+  return WaitChild(pid, arg_vector, /*no_wait=*/false, error_msg);
+}
+
+// A fallback implementation of `WaitChildWithTimeout` that creates a thread to wait instead of
+// relying on `pidfd_open`.
+int WaitChildWithTimeoutFallback(pid_t pid,
+                                 const std::vector<std::string>& arg_vector,
+                                 int timeout_ms,
+                                 bool* timed_out,
+                                 std::string* error_msg) {
+  bool child_exited = false;
+  std::condition_variable cv;
+  std::mutex m;
+
+  std::thread wait_thread([&]() {
+    std::unique_lock<std::mutex> lock(m);
+    if (!cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&] { return child_exited; })) {
+      *timed_out = true;
+      *error_msg =
+          StringPrintf("Child process %d timed out after %dms. Killing it", pid, timeout_ms);
+      kill(pid, SIGKILL);
+    }
+  });
+
+  // Leave the child in a waitable state just in case `wait_thread` sends a `SIGKILL` after the
+  // child exits.
+  std::string ignored_error_msg;
+  WaitChild(pid, arg_vector, /*no_wait=*/true, &ignored_error_msg);
+
+  {
+    std::unique_lock<std::mutex> lock(m);
+    child_exited = true;
+  }
+  cv.notify_all();
+  wait_thread.join();
+
+  if (*timed_out) {
+    WaitChild(pid, arg_vector, &ignored_error_msg);
+    return -1;
+  }
+  return WaitChild(pid, arg_vector, error_msg);
 }
 
 int WaitChildWithTimeout(pid_t pid,
+                         unique_fd pidfd,
                          const std::vector<std::string>& arg_vector,
-                         int timeout_sec,
+                         int timeout_ms,
                          bool* timed_out,
                          std::string* error_msg) {
   auto cleanup = android::base::make_scope_guard([&]() {
@@ -121,24 +182,12 @@
     WaitChild(pid, arg_vector, &ignored_error_msg);
   });
 
-#ifdef __BIONIC__
-  int pidfd = pidfd_open(pid, /*flags=*/0);
-#else
-  // There is no glibc wrapper for pidfd_open.
-  constexpr int SYS_pidfd_open = 434;
-  int pidfd = syscall(SYS_pidfd_open, pid, /*flags=*/0);
-#endif
-  if (pidfd < 0) {
-    *error_msg = StringPrintf("pidfd_open failed for pid %d: %s", pid, strerror(errno));
-    return -1;
-  }
-
   struct pollfd pfd;
-  pfd.fd = pidfd;
+  pfd.fd = pidfd.get();
   pfd.events = POLLIN;
-  int poll_ret = TEMP_FAILURE_RETRY(poll(&pfd, /*nfds=*/1, timeout_sec * 1000));
+  int poll_ret = TEMP_FAILURE_RETRY(poll(&pfd, /*nfds=*/1, timeout_ms));
 
-  close(pidfd);
+  pidfd.reset();
 
   if (poll_ret < 0) {
     *error_msg = StringPrintf("poll failed for pid %d: %s", pid, strerror(errno));
@@ -146,7 +195,7 @@
   }
   if (poll_ret == 0) {
     *timed_out = true;
-    *error_msg = StringPrintf("Child process %d timed out after %ds. Killing it", pid, timeout_sec);
+    *error_msg = StringPrintf("Child process %d timed out after %dms. Killing it", pid, timeout_ms);
     return -1;
   }
 
@@ -156,23 +205,23 @@
 
 }  // namespace
 
-int ExecAndReturnCode(const std::vector<std::string>& arg_vector, std::string* error_msg) {
-  // Start subprocess.
-  pid_t pid = ExecWithoutWait(arg_vector, error_msg);
-  if (pid == -1) {
-    return -1;
-  }
-
-  // Wait for subprocess to finish.
-  return WaitChild(pid, arg_vector, error_msg);
+int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector,
+                                 std::string* error_msg) const {
+  bool ignored_timed_out;
+  return ExecAndReturnCode(arg_vector, /*timeout_sec=*/-1, &ignored_timed_out, error_msg);
 }
 
-int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
-                      int timeout_sec,
-                      bool* timed_out,
-                      std::string* error_msg) {
+int ExecUtils::ExecAndReturnCode(const std::vector<std::string>& arg_vector,
+                                 int timeout_sec,
+                                 bool* timed_out,
+                                 std::string* error_msg) const {
   *timed_out = false;
 
+  if (timeout_sec > INT_MAX / 1000) {
+    *error_msg = "Timeout too large";
+    return -1;
+  }
+
   // Start subprocess.
   pid_t pid = ExecWithoutWait(arg_vector, error_msg);
   if (pid == -1) {
@@ -180,10 +229,23 @@
   }
 
   // Wait for subprocess to finish.
-  return WaitChildWithTimeout(pid, arg_vector, timeout_sec, timed_out, error_msg);
+  if (timeout_sec >= 0) {
+    unique_fd pidfd = PidfdOpen(pid);
+    if (pidfd.get() >= 0) {
+      return WaitChildWithTimeout(
+          pid, std::move(pidfd), arg_vector, timeout_sec * 1000, timed_out, error_msg);
+    } else {
+      LOG(DEBUG) << StringPrintf(
+          "pidfd_open failed for pid %d: %s, falling back", pid, strerror(errno));
+      return WaitChildWithTimeoutFallback(
+          pid, arg_vector, timeout_sec * 1000, timed_out, error_msg);
+    }
+  } else {
+    return WaitChild(pid, arg_vector, error_msg);
+  }
 }
 
-bool Exec(const std::vector<std::string>& arg_vector, std::string* error_msg) {
+bool ExecUtils::Exec(const std::vector<std::string>& arg_vector, std::string* error_msg) const {
   int status = ExecAndReturnCode(arg_vector, error_msg);
   if (status < 0) {
     // Internal error. The error message is already set.
@@ -198,4 +260,16 @@
   return true;
 }
 
+unique_fd ExecUtils::PidfdOpen(pid_t pid) const {
+#ifdef __BIONIC__
+  return unique_fd(pidfd_open(pid, /*flags=*/0));
+#else
+  // There is no glibc wrapper for pidfd_open.
+#ifndef SYS_pidfd_open
+  constexpr int SYS_pidfd_open = 434;
+#endif
+  return unique_fd(syscall(SYS_pidfd_open, pid, /*flags=*/0));
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/exec_utils.h b/runtime/exec_utils.h
index ff90ebd..79a12d7 100644
--- a/runtime/exec_utils.h
+++ b/runtime/exec_utils.h
@@ -22,46 +22,46 @@
 #include <string>
 #include <vector>
 
+#include "android-base/unique_fd.h"
+
 namespace art {
 
 // Wrapper on fork/execv to run a command in a subprocess.
 // These spawn child processes using the environment as it was set when the single instance
 // of the runtime (Runtime::Current()) was started.  If no instance of the runtime was started, it
 // will use the current environment settings.
-
-bool Exec(const std::vector<std::string>& arg_vector, /*out*/ std::string* error_msg);
-int ExecAndReturnCode(const std::vector<std::string>& arg_vector, /*out*/ std::string* error_msg);
-
-// Execute the command specified in `argv_vector` in a subprocess with a timeout.
-// Returns the process exit code on success, -1 otherwise.
-int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
-                      int timeout_sec,
-                      /*out*/ bool* timed_out,
-                      /*out*/ std::string* error_msg);
-
-// A wrapper class to make the functions above mockable.
 class ExecUtils {
  public:
   virtual ~ExecUtils() = default;
 
   virtual bool Exec(const std::vector<std::string>& arg_vector,
-                    /*out*/ std::string* error_msg) const {
-    return art::Exec(arg_vector, error_msg);
-  }
+                    /*out*/ std::string* error_msg) const;
 
   virtual int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
-                                /*out*/ std::string* error_msg) const {
-    return art::ExecAndReturnCode(arg_vector, error_msg);
-  }
+                                /*out*/ std::string* error_msg) const;
 
+  // Executes the command specified in `arg_vector` in a subprocess with a timeout.
+  // If `timeout_sec` is negative, blocks until the subprocess exits.
+  // Returns the process exit code on success, -1 otherwise.
+  // Sets `timed_out` to true if the process times out, or false otherwise.
   virtual int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
                                 int timeout_sec,
                                 /*out*/ bool* timed_out,
-                                /*out*/ std::string* error_msg) const {
-    return art::ExecAndReturnCode(arg_vector, timeout_sec, timed_out, error_msg);
-  }
+                                /*out*/ std::string* error_msg) const;
+
+ protected:
+  virtual android::base::unique_fd PidfdOpen(pid_t pid) const;
 };
 
+inline bool Exec(const std::vector<std::string>& arg_vector, /*out*/ std::string* error_msg) {
+  return ExecUtils().Exec(arg_vector, error_msg);
+}
+
+inline int ExecAndReturnCode(const std::vector<std::string>& arg_vector,
+                             /*out*/ std::string* error_msg) {
+  return ExecUtils().ExecAndReturnCode(arg_vector, error_msg);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_EXEC_UTILS_H_
diff --git a/runtime/exec_utils_test.cc b/runtime/exec_utils_test.cc
index aa53739..c10b84e 100644
--- a/runtime/exec_utils_test.cc
+++ b/runtime/exec_utils_test.cc
@@ -16,13 +16,18 @@
 
 #include "exec_utils.h"
 
-#include <unistd.h>
+#include <sys/utsname.h>
 
+#include <cstring>
+#include <filesystem>
+#include <memory>
+#include <tuple>
+
+#include "android-base/logging.h"
 #include "android-base/stringprintf.h"
 #include "base/file_utils.h"
 #include "base/memory_tool.h"
 #include "common_runtime_test.h"
-#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -30,69 +35,95 @@
 std::string PrettyArguments(const char* signature);
 std::string PrettyReturnType(const char* signature);
 
-bool IsPidfdSupported() {
-#ifdef __BIONIC__
-  return true;
-#else
-  constexpr int SYS_pidfd_open = 434;
-  int pidfd = syscall(SYS_pidfd_open, getpid(), /*flags=*/0);
-  if (pidfd < 0) {
-    return false;
-  }
-  close(pidfd);
-  return true;
-#endif
-}
-
-class ExecUtilsTest : public CommonRuntimeTest {};
-
-TEST_F(ExecUtilsTest, ExecSuccess) {
-  std::vector<std::string> command;
+std::string GetBin(const std::string& name) {
   if (kIsTargetBuild) {
     std::string android_root(GetAndroidRoot());
-    command.push_back(android_root + "/bin/id");
+    return android_root + "/bin/" + name;
+  } else if (std::filesystem::exists("/usr/bin/" + name)) {
+    return "/usr/bin/" + name;
   } else {
-    command.push_back("/usr/bin/id");
+    return "/bin/" + name;
   }
+}
+
+std::tuple<int, int> GetKernelVersion() {
+  std::tuple<int, int> version;
+  utsname uts;
+  CHECK_EQ(uname(&uts), 0);
+  CHECK_EQ(sscanf(uts.release, "%d.%d", &std::get<0>(version), &std::get<1>(version)), 2);
+  return version;
+}
+
+class AlwaysFallbackExecUtils : public ExecUtils {
+ protected:
+  android::base::unique_fd PidfdOpen(pid_t) const override { return android::base::unique_fd(-1); }
+};
+
+class NeverFallbackExecUtils : public ExecUtils {
+ protected:
+  android::base::unique_fd PidfdOpen(pid_t pid) const override {
+    android::base::unique_fd pidfd = ExecUtils::PidfdOpen(pid);
+    CHECK_GE(pidfd.get(), 0) << strerror(errno);
+    return pidfd;
+  }
+};
+
+class ExecUtilsTest : public CommonRuntimeTest, public testing::WithParamInterface<bool> {
+ protected:
+  void SetUp() override {
+    CommonRuntimeTest::SetUp();
+    bool always_fallback = GetParam();
+    if (always_fallback) {
+      exec_utils_ = std::make_unique<AlwaysFallbackExecUtils>();
+    } else {
+      if (GetKernelVersion() >= std::make_tuple(5, 4)) {
+        exec_utils_ = std::make_unique<NeverFallbackExecUtils>();
+      } else {
+        GTEST_SKIP() << "Kernel version older than 5.4";
+      }
+    }
+  }
+
+  std::unique_ptr<ExecUtils> exec_utils_;
+};
+
+TEST_P(ExecUtilsTest, ExecSuccess) {
+  std::vector<std::string> command;
+  command.push_back(GetBin("id"));
   std::string error_msg;
   // Historical note: Running on Valgrind failed due to some memory
   // that leaks in thread alternate signal stacks.
-  EXPECT_TRUE(Exec(command, &error_msg));
+  EXPECT_TRUE(exec_utils_->Exec(command, &error_msg));
   EXPECT_EQ(0U, error_msg.size()) << error_msg;
 }
 
-TEST_F(ExecUtilsTest, ExecError) {
+TEST_P(ExecUtilsTest, ExecError) {
   std::vector<std::string> command;
   command.push_back("bogus");
   std::string error_msg;
   // Historical note: Running on Valgrind failed due to some memory
   // that leaks in thread alternate signal stacks.
-  EXPECT_FALSE(Exec(command, &error_msg));
+  EXPECT_FALSE(exec_utils_->Exec(command, &error_msg));
   EXPECT_FALSE(error_msg.empty());
 }
 
-TEST_F(ExecUtilsTest, EnvSnapshotAdditionsAreNotVisible) {
+TEST_P(ExecUtilsTest, EnvSnapshotAdditionsAreNotVisible) {
   static constexpr const char* kModifiedVariable = "EXEC_SHOULD_NOT_EXPORT_THIS";
   static constexpr int kOverwrite = 1;
   // Set an variable in the current environment.
   EXPECT_EQ(setenv(kModifiedVariable, "NEVER", kOverwrite), 0);
   // Test that it is not exported.
   std::vector<std::string> command;
-  if (kIsTargetBuild) {
-    std::string android_root(GetAndroidRoot());
-    command.push_back(android_root + "/bin/printenv");
-  } else {
-    command.push_back("/usr/bin/printenv");
-  }
+  command.push_back(GetBin("printenv"));
   command.push_back(kModifiedVariable);
   std::string error_msg;
   // Historical note: Running on Valgrind failed due to some memory
   // that leaks in thread alternate signal stacks.
-  EXPECT_FALSE(Exec(command, &error_msg));
+  EXPECT_FALSE(exec_utils_->Exec(command, &error_msg));
   EXPECT_NE(0U, error_msg.size()) << error_msg;
 }
 
-TEST_F(ExecUtilsTest, EnvSnapshotDeletionsAreNotVisible) {
+TEST_P(ExecUtilsTest, EnvSnapshotDeletionsAreNotVisible) {
   static constexpr const char* kDeletedVariable = "PATH";
   static constexpr int kOverwrite = 1;
   // Save the variable's value.
@@ -102,17 +133,12 @@
   EXPECT_EQ(unsetenv(kDeletedVariable), 0);
   // Test that it is not exported.
   std::vector<std::string> command;
-  if (kIsTargetBuild) {
-    std::string android_root(GetAndroidRoot());
-    command.push_back(android_root + "/bin/printenv");
-  } else {
-    command.push_back("/usr/bin/printenv");
-  }
+  command.push_back(GetBin("printenv"));
   command.push_back(kDeletedVariable);
   std::string error_msg;
   // Historical note: Running on Valgrind failed due to some memory
   // that leaks in thread alternate signal stacks.
-  EXPECT_TRUE(Exec(command, &error_msg));
+  EXPECT_TRUE(exec_utils_->Exec(command, &error_msg));
   EXPECT_EQ(0U, error_msg.size()) << error_msg;
   // Restore the variable's value.
   EXPECT_EQ(setenv(kDeletedVariable, save_value, kOverwrite), 0);
@@ -120,52 +146,32 @@
 
 static std::vector<std::string> SleepCommand(int sleep_seconds) {
   std::vector<std::string> command;
-  if (kIsTargetBuild) {
-    command.push_back(GetAndroidRoot() + "/bin/sleep");
-  } else {
-    command.push_back("/bin/sleep");
-  }
+  command.push_back(GetBin("sleep"));
   command.push_back(android::base::StringPrintf("%d", sleep_seconds));
   return command;
 }
 
-TEST_F(ExecUtilsTest, ExecTimeout) {
-  if (!IsPidfdSupported()) {
-    GTEST_SKIP() << "pidfd not supported";
-  }
-
+TEST_P(ExecUtilsTest, ExecTimeout) {
   static constexpr int kSleepSeconds = 5;
   static constexpr int kWaitSeconds = 1;
   std::vector<std::string> command = SleepCommand(kSleepSeconds);
   std::string error_msg;
   bool timed_out;
-  ASSERT_EQ(ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), -1);
-  EXPECT_TRUE(timed_out);
+  ASSERT_EQ(exec_utils_->ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), -1);
+  EXPECT_TRUE(timed_out) << error_msg;
 }
 
-TEST_F(ExecUtilsTest, ExecNoTimeout) {
-  if (!IsPidfdSupported()) {
-    GTEST_SKIP() << "pidfd not supported";
-  }
-
+TEST_P(ExecUtilsTest, ExecNoTimeout) {
   static constexpr int kSleepSeconds = 1;
   static constexpr int kWaitSeconds = 5;
   std::vector<std::string> command = SleepCommand(kSleepSeconds);
   std::string error_msg;
   bool timed_out;
-  ASSERT_EQ(ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), 0);
+  ASSERT_EQ(exec_utils_->ExecAndReturnCode(command, kWaitSeconds, &timed_out, &error_msg), 0)
+      << error_msg;
   EXPECT_FALSE(timed_out);
 }
 
-TEST_F(ExecUtilsTest, ExecTimeoutNotSupported) {
-  if (IsPidfdSupported()) {
-    GTEST_SKIP() << "pidfd supported";
-  }
-
-  std::string error_msg;
-  bool timed_out;
-  ASSERT_EQ(ExecAndReturnCode({"command"}, /*timeout_sec=*/0, &timed_out, &error_msg), -1);
-  EXPECT_THAT(error_msg, testing::HasSubstr("pidfd_open failed for pid"));
-}
+INSTANTIATE_TEST_SUITE_P(AlwaysOrNeverFallback, ExecUtilsTest, testing::Values(true, false));
 
 }  // namespace art