diff options
| author | 2022-02-25 20:47:48 +0000 | |
|---|---|---|
| committer | 2022-02-25 20:47:48 +0000 | |
| commit | a31e4a13fa78c9283a66ec2c183787fa12dfb125 (patch) | |
| tree | c9864f999b327c53e9e1b484f34e880179b3fa5a | |
| parent | 25969707e1e4bcd9a8b40645acc8f7341b98bb0d (diff) | |
| parent | 495142ac75cdb2595a61755d7ab42a748e651728 (diff) | |
Merge "Add a timeout for all installd operations." into tm-dev
| -rw-r--r-- | cmds/installd/dexopt.cpp | 112 | ||||
| -rw-r--r-- | cmds/installd/tests/installd_dexopt_test.cpp | 6 | ||||
| -rw-r--r-- | cmds/installd/tests/installd_utils_test.cpp | 42 | ||||
| -rw-r--r-- | cmds/installd/utils.cpp | 78 | ||||
| -rw-r--r-- | cmds/installd/utils.h | 7 | ||||
| -rw-r--r-- | cmds/installd/view_compiler.cpp | 20 | 
6 files changed, 198 insertions, 67 deletions
diff --git a/cmds/installd/dexopt.cpp b/cmds/installd/dexopt.cpp index c796da6c46..9647865e4f 100644 --- a/cmds/installd/dexopt.cpp +++ b/cmds/installd/dexopt.cpp @@ -74,10 +74,19 @@ using android::base::ReadFdToString;  using android::base::ReadFully;  using android::base::StringPrintf;  using android::base::WriteFully; +using android::base::borrowed_fd;  using android::base::unique_fd;  namespace { +// Timeout for short operations, such as merging profiles. +constexpr int kShortTimeoutMs = 60000; // 1 minute. + +// Timeout for long operations, such as compilation. This should be smaller than the Package Manager +// watchdog (PackageManagerService.WATCHDOG_TIMEOUT, 10 minutes), so that the operation will be +// aborted before that watchdog would take down the system server. +constexpr int kLongTimeoutMs = 570000; // 9.5 minutes. +  class DexOptStatus {   public:      // Check if dexopt is cancelled and fork if it is not cancelled. @@ -486,6 +495,25 @@ static void open_profile_files(uid_t uid, const std::string& package_name,      }  } +// Cleans up an output file specified by a file descriptor. This function should be called whenever +// a subprocess that modifies a system-managed file crashes. +// If the subprocess crashes while it's writing to the file, the file is likely corrupted, so we +// should remove it. +// If the subprocess times out and is killed while it's acquiring a flock on the file, there is +// probably a deadlock, so it's also good to remove the file so that later operations won't +// encounter the same problem. It's safe to do so because the process that is holding the flock will +// still have access to the file until the file descriptor is closed. +// Note that we can't do `clear_reference_profile` here even if the fd points to a reference profile +// because that also requires a flock and is therefore likely to be stuck in the second case. +static bool cleanup_output_fd(int fd) { +    std::string path; +    bool ret = remove_file_at_fd(fd, &path); +    if (ret) { +        LOG(INFO) << "Removed file at path " << path; +    } +    return ret; +} +  static constexpr int PROFMAN_BIN_RETURN_CODE_SUCCESS = 0;  static constexpr int PROFMAN_BIN_RETURN_CODE_COMPILE = 1;  static constexpr int PROFMAN_BIN_RETURN_CODE_SKIP_COMPILATION_NOT_ENOUGH_DELTA = 2; @@ -497,13 +525,14 @@ static constexpr int PROFMAN_BIN_RETURN_CODE_SKIP_COMPILATION_EMPTY_PROFILES = 7  class RunProfman : public ExecVHelper {    public: -   void SetupArgs(const std::vector<unique_fd>& profile_fds, -                  const unique_fd& reference_profile_fd, -                  const std::vector<unique_fd>& apk_fds, -                  const std::vector<std::string>& dex_locations, -                  bool copy_and_update, -                  bool for_snapshot, -                  bool for_boot_image) { +    template <typename T, typename U> +    void SetupArgs(const std::vector<T>& profile_fds, +                   const unique_fd& reference_profile_fd, +                   const std::vector<U>& apk_fds, +                   const std::vector<std::string>& dex_locations, +                   bool copy_and_update, +                   bool for_snapshot, +                   bool for_boot_image) {          // TODO(calin): Assume for now we run in the bg compile job (which is in          // most of the invocation). With the current data flow, is not very easy or @@ -519,11 +548,11 @@ class RunProfman : public ExecVHelper {              AddArg("--reference-profile-file-fd=" + std::to_string(reference_profile_fd.get()));          } -        for (const unique_fd& fd : profile_fds) { +        for (const T& fd : profile_fds) {              AddArg("--profile-file-fd=" + std::to_string(fd.get()));          } -        for (const unique_fd& fd : apk_fds) { +        for (const U& fd : apk_fds) {              AddArg("--apk-fd=" + std::to_string(fd.get()));          } @@ -582,20 +611,14 @@ class RunProfman : public ExecVHelper {                    for_boot_image);      } -    void SetupCopyAndUpdate(unique_fd&& profile_fd, -                            unique_fd&& reference_profile_fd, -                            unique_fd&& apk_fd, +    void SetupCopyAndUpdate(const unique_fd& profile_fd, +                            const unique_fd& reference_profile_fd, +                            const unique_fd& apk_fd,                              const std::string& dex_location) { -        // The fds need to stay open longer than the scope of the function, so put them into a local -        // variable vector. -        profiles_fd_.push_back(std::move(profile_fd)); -        apk_fds_.push_back(std::move(apk_fd)); -        reference_profile_fd_ = std::move(reference_profile_fd); -        std::vector<std::string> dex_locations = {dex_location}; -        SetupArgs(profiles_fd_, -                  reference_profile_fd_, -                  apk_fds_, -                  dex_locations, +        SetupArgs(std::vector<borrowed_fd>{profile_fd}, +                  reference_profile_fd, +                  std::vector<borrowed_fd>{apk_fd}, +                  {dex_location},                    /*copy_and_update=*/true,                    /*for_snapshot*/false,                    /*for_boot_image*/false); @@ -621,11 +644,6 @@ class RunProfman : public ExecVHelper {      void Exec() {          ExecVHelper::Exec(DexoptReturnCodes::kProfmanExec);      } - -  private: -    unique_fd reference_profile_fd_; -    std::vector<unique_fd> profiles_fd_; -    std::vector<unique_fd> apk_fds_;  };  static int analyze_profiles(uid_t uid, const std::string& package_name, @@ -657,13 +675,14 @@ static int analyze_profiles(uid_t uid, const std::string& package_name,          profman_merge.Exec();      }      /* parent */ -    int return_code = wait_child(pid); +    int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);      bool need_to_compile = false;      bool empty_profiles = false;      bool should_clear_current_profiles = false;      bool should_clear_reference_profile = false;      if (!WIFEXITED(return_code)) {          LOG(WARNING) << "profman failed for location " << location << ": " << return_code; +        cleanup_output_fd(reference_profile_fd.get());      } else {          return_code = WEXITSTATUS(return_code);          switch (return_code) { @@ -797,10 +816,10 @@ bool dump_profiles(int32_t uid, const std::string& pkgname, const std::string& p          profman_dump.Exec();      }      /* parent */ -    int return_code = wait_child(pid); +    int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);      if (!WIFEXITED(return_code)) { -        LOG(WARNING) << "profman failed for package " << pkgname << ": " -                << return_code; +        LOG(WARNING) << "profman failed for package " << pkgname << ": " << return_code; +        cleanup_output_fd(output_fd.get());          return false;      }      return true; @@ -871,7 +890,11 @@ bool copy_system_profile(const std::string& system_profile,          _exit(0);      }      /* parent */ -    int return_code = wait_child(pid); +    int return_code = wait_child_with_timeout(pid, kShortTimeoutMs); +    if (!WIFEXITED(return_code)) { +        cleanup_output_fd(out_fd.get()); +        return false; +    }      return return_code == 0;  } @@ -1521,7 +1544,7 @@ static bool get_class_loader_context_dex_paths(const char* class_loader_context,      }      pipe_read.reset(); -    int return_code = wait_child(pid); +    int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);      if (!WIFEXITED(return_code)) {          PLOG(ERROR) << "Error waiting for child dexoptanalyzer process";          return false; @@ -1695,7 +1718,7 @@ static SecondaryDexOptProcessResult process_secondary_dex_dexopt(const std::stri      }      /* parent */ -    int result = wait_child(pid); +    int result = wait_child_with_timeout(pid, kShortTimeoutMs);      cancelled = dexopt_status_->check_if_killed_and_remove_dexopt_pid(pid);      if (!WIFEXITED(result)) {          if ((WTERMSIG(result) == SIGKILL) && cancelled) { @@ -1954,7 +1977,7 @@ int dexopt(const char* dex_path, uid_t uid, const char* pkgname, const char* ins          runner.Exec(DexoptReturnCodes::kDex2oatExec);      } else { -        int res = wait_child(pid); +        int res = wait_child_with_timeout(pid, kLongTimeoutMs);          bool cancelled = dexopt_status_->check_if_killed_and_remove_dexopt_pid(pid);          if (res == 0) {              LOG(VERBOSE) << "DexInv: --- END '" << dex_path << "' (success) ---"; @@ -2143,7 +2166,7 @@ bool reconcile_secondary_dex_file(const std::string& dex_path,          _exit(result ? kReconcileSecondaryDexCleanedUp : kReconcileSecondaryDexAccessIOError);      } -    int return_code = wait_child(pid); +    int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);      if (!WIFEXITED(return_code)) {          LOG(WARNING) << "reconcile dex failed for location " << dex_path << ": " << return_code;      } else { @@ -2261,7 +2284,7 @@ bool hash_secondary_dex_file(const std::string& dex_path, const std::string& pkg      if (!ReadFully(pipe_read, out_secondary_dex_hash->data(), out_secondary_dex_hash->size())) {          out_secondary_dex_hash->clear();      } -    return wait_child(pid) == 0; +    return wait_child_with_timeout(pid, kShortTimeoutMs) == 0;  }  // Helper for move_ab, so that we can have common failure-case cleanup. @@ -2591,9 +2614,10 @@ static bool create_app_profile_snapshot(int32_t app_id,      }      /* parent */ -    int return_code = wait_child(pid); +    int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);      if (!WIFEXITED(return_code)) {          LOG(WARNING) << "profman failed for " << package_name << ":" << profile_name; +        cleanup_output_fd(snapshot_fd.get());          return false;      } @@ -2700,10 +2724,11 @@ static bool create_boot_image_profile_snapshot(const std::string& package_name,          }          /* parent */ -        int return_code = wait_child(pid); +        int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);          if (!WIFEXITED(return_code)) {              PLOG(WARNING) << "profman failed for " << package_name << ":" << profile_name; +            cleanup_output_fd(snapshot_fd.get());              return false;          } @@ -2774,9 +2799,9 @@ bool prepare_app_profile(const std::string& package_name,      }      RunProfman args; -    args.SetupCopyAndUpdate(std::move(dex_metadata_fd), -                            std::move(ref_profile_fd), -                            std::move(apk_fd), +    args.SetupCopyAndUpdate(dex_metadata_fd, +                            ref_profile_fd, +                            apk_fd,                              code_path);      pid_t pid = fork();      if (pid == 0) { @@ -2789,9 +2814,10 @@ bool prepare_app_profile(const std::string& package_name,      }      /* parent */ -    int return_code = wait_child(pid); +    int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);      if (!WIFEXITED(return_code)) {          PLOG(WARNING) << "profman failed for " << package_name << ":" << profile_name; +        cleanup_output_fd(ref_profile_fd.get());          return false;      }      return true; diff --git a/cmds/installd/tests/installd_dexopt_test.cpp b/cmds/installd/tests/installd_dexopt_test.cpp index bb36c395e1..f21a304a3e 100644 --- a/cmds/installd/tests/installd_dexopt_test.cpp +++ b/cmds/installd/tests/installd_dexopt_test.cpp @@ -50,6 +50,8 @@ using android::base::unique_fd;  namespace android {  namespace installd { +constexpr int kTimeoutMs = 60000; +  // TODO(calin): try to dedup this code.  #if defined(__arm__)  static const std::string kRuntimeIsa = "arm"; @@ -1073,7 +1075,7 @@ class ProfileTest : public DexoptTest {              _exit(0);          }          /* parent */ -        ASSERT_TRUE(WIFEXITED(wait_child(pid))); +        ASSERT_TRUE(WIFEXITED(wait_child_with_timeout(pid, kTimeoutMs)));      }      void mergePackageProfiles(const std::string& package_name, @@ -1377,7 +1379,7 @@ class BootProfileTest : public ProfileTest {              _exit(0);          }          /* parent */ -        ASSERT_TRUE(WIFEXITED(wait_child(pid))); +        ASSERT_TRUE(WIFEXITED(wait_child_with_timeout(pid, kTimeoutMs)));      }    protected:      std::string intial_android_profiles_dir; diff --git a/cmds/installd/tests/installd_utils_test.cpp b/cmds/installd/tests/installd_utils_test.cpp index a7a8624ff6..8d1ccdc5d6 100644 --- a/cmds/installd/tests/installd_utils_test.cpp +++ b/cmds/installd/tests/installd_utils_test.cpp @@ -14,8 +14,10 @@   * limitations under the License.   */ +#include <errno.h>  #include <stdlib.h>  #include <string.h> +#include <unistd.h>  #include <android-base/logging.h>  #include <android-base/scopeguard.h> @@ -690,5 +692,45 @@ TEST_F(UtilsTest, TestSupplementalDataPaths) {                create_data_misc_supplemental_shared_path(nullptr, false, 10, "com.foo"));  } +TEST_F(UtilsTest, WaitChild) { +    pid_t pid = fork(); +    if (pid == 0) { +        /* child */ +        // Do nothing. +        _exit(0); +    } +    /* parent */ +    int return_code = wait_child_with_timeout(pid, /*timeout_ms=*/100); +    EXPECT_TRUE(WIFEXITED(return_code)); +    EXPECT_EQ(WEXITSTATUS(return_code), 0); +} + +TEST_F(UtilsTest, WaitChildTimeout) { +    pid_t pid = fork(); +    if (pid == 0) { +        /* child */ +        sleep(1); +        _exit(0); +    } +    /* parent */ +    int return_code = wait_child_with_timeout(pid, /*timeout_ms=*/1); +    EXPECT_FALSE(WIFEXITED(return_code)); +    EXPECT_EQ(WTERMSIG(return_code), SIGKILL); +} + +TEST_F(UtilsTest, RemoveFileAtFd) { +    std::string filename = "/data/local/tmp/tempfile-XXXXXX"; +    int fd = mkstemp(filename.data()); +    ASSERT_GE(fd, 0); +    ASSERT_EQ(access(filename.c_str(), F_OK), 0); + +    std::string actual_filename; +    remove_file_at_fd(fd, &actual_filename); +    EXPECT_NE(access(filename.c_str(), F_OK), 0); +    EXPECT_EQ(filename, actual_filename); + +    close(fd); +} +  }  // namespace installd  }  // namespace android diff --git a/cmds/installd/utils.cpp b/cmds/installd/utils.cpp index 3ce4b67d9a..6650b761e1 100644 --- a/cmds/installd/utils.cpp +++ b/cmds/installd/utils.cpp @@ -19,18 +19,21 @@  #include <errno.h>  #include <fcntl.h>  #include <fts.h> +#include <poll.h>  #include <stdlib.h>  #include <sys/capability.h> +#include <sys/pidfd.h>  #include <sys/stat.h>  #include <sys/statvfs.h>  #include <sys/wait.h>  #include <sys/xattr.h> +#include <unistd.h>  #include <uuid/uuid.h>  #include <android-base/file.h>  #include <android-base/logging.h> -#include <android-base/strings.h>  #include <android-base/stringprintf.h> +#include <android-base/strings.h>  #include <android-base/unique_fd.h>  #include <cutils/fs.h>  #include <cutils/properties.h> @@ -1096,30 +1099,45 @@ int ensure_config_user_dirs(userid_t userid) {      return fs_prepare_dir(path.c_str(), 0750, uid, gid);  } -int wait_child(pid_t pid) -{ +static int wait_child(pid_t pid) {      int status; -    pid_t got_pid; +    pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, /*options=*/0)); -    while (1) { -        got_pid = waitpid(pid, &status, 0); -        if (got_pid == -1 && errno == EINTR) { -            printf("waitpid interrupted, retrying\n"); -        } else { -            break; -        } -    }      if (got_pid != pid) { -        ALOGW("waitpid failed: wanted %d, got %d: %s\n", -            (int) pid, (int) got_pid, strerror(errno)); -        return 1; +        PLOG(ERROR) << "waitpid failed: wanted " << pid << ", got " << got_pid; +        return W_EXITCODE(/*exit_code=*/255, /*signal_number=*/0);      } -    if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { -        return 0; -    } else { -        return status;      /* always nonzero */ +    return status; +} + +int wait_child_with_timeout(pid_t pid, int timeout_ms) { +    int pidfd = pidfd_open(pid, /*flags=*/0); +    if (pidfd < 0) { +        PLOG(ERROR) << "pidfd_open failed for pid " << pid; +        kill(pid, SIGKILL); +        return wait_child(pid); +    } + +    struct pollfd pfd; +    pfd.fd = pidfd; +    pfd.events = POLLIN; +    int poll_ret = TEMP_FAILURE_RETRY(poll(&pfd, /*nfds=*/1, timeout_ms)); + +    close(pidfd); + +    if (poll_ret < 0) { +        PLOG(ERROR) << "poll failed for pid " << pid; +        kill(pid, SIGKILL); +        return wait_child(pid);      } +    if (poll_ret == 0) { +        LOG(WARNING) << "Child process " << pid << " timed out after " << timeout_ms +                     << "ms. Killing it"; +        kill(pid, SIGKILL); +        return wait_child(pid); +    } +    return wait_child(pid);  }  /** @@ -1332,5 +1350,27 @@ void drop_capabilities(uid_t uid) {      }  } +bool remove_file_at_fd(int fd, /*out*/ std::string* path) { +    char path_buffer[PATH_MAX + 1]; +    std::string proc_path = android::base::StringPrintf("/proc/self/fd/%d", fd); +    ssize_t len = readlink(proc_path.c_str(), path_buffer, PATH_MAX); +    if (len < 0) { +        PLOG(WARNING) << "Could not remove file at fd " << fd << ": Failed to get file path"; +        return false; +    } +    path_buffer[len] = '\0'; +    if (path != nullptr) { +        *path = path_buffer; +    } +    if (unlink(path_buffer) != 0) { +        if (errno == ENOENT) { +            return true; +        } +        PLOG(WARNING) << "Could not remove file at path " << path_buffer; +        return false; +    } +    return true; +} +  }  // namespace installd  }  // namespace android diff --git a/cmds/installd/utils.h b/cmds/installd/utils.h index 2db1623cbe..2d00845c54 100644 --- a/cmds/installd/utils.h +++ b/cmds/installd/utils.h @@ -160,7 +160,8 @@ int validate_apk_path_subdirs(const char *path);  int ensure_config_user_dirs(userid_t userid); -int wait_child(pid_t pid); +// Waits for a child process, or kills it if it times out. Returns the exit code. +int wait_child_with_timeout(pid_t pid, int timeout_ms);  int prepare_app_cache_dir(const std::string& parent, const char* name, mode_t target_mode,          uid_t uid, gid_t gid); @@ -177,6 +178,10 @@ bool collect_profiles(std::vector<std::string>* profiles_paths);  void drop_capabilities(uid_t uid); +// Removes a file specified by a file descriptor. Returns true on success. Reports the file path to +// `path` if present. +bool remove_file_at_fd(int fd, /*out*/ std::string* path = nullptr); +  }  // namespace installd  }  // namespace android diff --git a/cmds/installd/view_compiler.cpp b/cmds/installd/view_compiler.cpp index 60d6492e70..8c000a11c9 100644 --- a/cmds/installd/view_compiler.cpp +++ b/cmds/installd/view_compiler.cpp @@ -33,7 +33,13 @@  namespace android {  namespace installd { -using base::unique_fd; +namespace { + +using ::android::base::unique_fd; + +constexpr int kTimeoutMs = 300000; + +} // namespace  bool view_compiler(const char* apk_path, const char* package_name, const char* out_dex_file,                     int uid) { @@ -88,7 +94,17 @@ bool view_compiler(const char* apk_path, const char* package_name, const char* o          _exit(1);      } -    return wait_child(pid) == 0; +    int return_code = wait_child_with_timeout(pid, kTimeoutMs); +    if (!WIFEXITED(return_code)) { +        LOG(WARNING) << "viewcompiler failed for " << package_name << ":" << apk_path; +        if (WTERMSIG(return_code) == SIGKILL) { +            // If the subprocess is killed while it's writing to the file, the file is likely +            // corrupted, so we should remove it. +            remove_file_at_fd(outfd.get()); +        } +        return false; +    } +    return WEXITSTATUS(return_code) == 0;  }  } // namespace installd  |