Add a function to list all files managed by ART Service.

This is going to be used in the "sweep" phase of the GC.

Bug: 254013425
Test: m test-art-host-gtest-art_libarttools_tests
Test: m test-art-host-gtest-art_artd_tests
Test: atest art_standalone_libarttools_tests
Test: atest atest art_standalone_artd_tests
Ignore-AOSP-First: ART Services.
Change-Id: Ie7a5bd6f805c370aa3c2e3a1ab1d5408e4552f83
diff --git a/artd/path_utils.cc b/artd/path_utils.cc
index 295b023..cac9444 100644
--- a/artd/path_utils.cc
+++ b/artd/path_utils.cc
@@ -17,6 +17,8 @@
 #include "path_utils.h"
 
 #include <filesystem>
+#include <string>
+#include <vector>
 
 #include "aidl/com/android/server/art/BnArtd.h"
 #include "android-base/errors.h"
@@ -27,6 +29,7 @@
 #include "file_utils.h"
 #include "fmt/format.h"
 #include "oat_file_assistant.h"
+#include "tools/tools.h"
 
 namespace art {
 namespace artd {
@@ -99,6 +102,15 @@
   return result;
 }
 
+Result<std::string> GetAndroidExpandOrError() {
+  std::string error_msg;
+  std::string result = GetAndroidExpandSafe(&error_msg);
+  if (!error_msg.empty()) {
+    return Error() << error_msg;
+  }
+  return result;
+}
+
 Result<std::string> GetArtRootOrError() {
   std::string error_msg;
   std::string result = GetArtRootSafe(&error_msg);
@@ -110,6 +122,37 @@
 
 }  // namespace
 
+Result<std::vector<std::string>> ListManagedFiles() {
+  std::string android_data = OR_RETURN(GetAndroidDataOrError());
+  std::string android_expand = OR_RETURN(GetAndroidExpandOrError());
+
+  // See `art::tools::Glob` for the syntax.
+  std::vector<std::string> patterns = {
+      // Profiles for primary dex files.
+      android_data + "/misc/profiles/**",
+      // Artifacts for primary dex files.
+      android_data + "/dalvik-cache/**",
+  };
+
+  for (const std::string& data_root : {android_data, android_expand + "/*"}) {
+    // Artifacts for primary dex files.
+    patterns.push_back(data_root + "/app/*/*/oat/**");
+    // Profiles and artifacts for secondary dex files. Those files are in app data directories, so
+    // we use more granular patterns to avoid accidentally deleting apps' files.
+    for (const char* user_dir : {"/user", "/user_de"}) {
+      std::string secondary_oat_dir = data_root + user_dir + "/*/*/**/oat";
+      for (const char* maybe_tmp_suffix : {"", ".*.tmp"}) {
+        patterns.push_back(secondary_oat_dir + "/*.prof" + maybe_tmp_suffix);
+        patterns.push_back(secondary_oat_dir + "/*/*.odex" + maybe_tmp_suffix);
+        patterns.push_back(secondary_oat_dir + "/*/*.vdex" + maybe_tmp_suffix);
+        patterns.push_back(secondary_oat_dir + "/*/*.art" + maybe_tmp_suffix);
+      }
+    }
+  }
+
+  return tools::Glob(patterns);
+}
+
 Result<void> ValidateDexPath(const std::string& dex_path) {
   OR_RETURN(ValidateAbsoluteNormalPath(dex_path));
   if (!EndsWith(dex_path, ".apk") && !EndsWith(dex_path, ".jar")) {
diff --git a/artd/path_utils.h b/artd/path_utils.h
index 0cc017e..1063f91 100644
--- a/artd/path_utils.h
+++ b/artd/path_utils.h
@@ -17,6 +17,9 @@
 #ifndef ART_ARTD_PATH_UTILS_H_
 #define ART_ARTD_PATH_UTILS_H_
 
+#include <string>
+#include <vector>
+
 #include "aidl/com/android/server/art/BnArtd.h"
 #include "android-base/result.h"
 #include "base/file_utils.h"
@@ -24,6 +27,9 @@
 namespace art {
 namespace artd {
 
+// Returns all existing files that are managed by artd.
+android::base::Result<std::vector<std::string>> ListManagedFiles();
+
 android::base::Result<void> ValidateDexPath(const std::string& dex_path);
 
 android::base::Result<std::string> BuildArtBinPath(const std::string& binary_name);
diff --git a/libartbase/base/file_utils.cc b/libartbase/base/file_utils.cc
index 2396289..d32a54e 100644
--- a/libartbase/base/file_utils.cc
+++ b/libartbase/base/file_utils.cc
@@ -73,6 +73,8 @@
 static constexpr const char* kAndroidSystemExtRootDefaultPath = "/system_ext";
 static constexpr const char* kAndroidDataEnvVar = "ANDROID_DATA";
 static constexpr const char* kAndroidDataDefaultPath = "/data";
+static constexpr const char* kAndroidExpandEnvVar = "ANDROID_EXPAND";
+static constexpr const char* kAndroidExpandDefaultPath = "/mnt/expand";
 static constexpr const char* kAndroidArtRootEnvVar = "ANDROID_ART_ROOT";
 static constexpr const char* kAndroidConscryptRootEnvVar = "ANDROID_CONSCRYPT_ROOT";
 static constexpr const char* kAndroidI18nRootEnvVar = "ANDROID_I18N_ROOT";
@@ -282,6 +284,18 @@
 
 std::string GetAndroidData() { return GetAndroidDir(kAndroidDataEnvVar, kAndroidDataDefaultPath); }
 
+std::string GetAndroidExpandSafe(std::string* error_msg) {
+  const char* android_dir = GetAndroidDirSafe(kAndroidExpandEnvVar,
+                                              kAndroidExpandDefaultPath,
+                                              /*must_exist=*/true,
+                                              error_msg);
+  return (android_dir != nullptr) ? android_dir : "";
+}
+
+std::string GetAndroidExpand() {
+  return GetAndroidDir(kAndroidExpandEnvVar, kAndroidExpandDefaultPath);
+}
+
 std::string GetArtApexData() {
   return GetAndroidDir(kArtApexDataEnvVar, kArtApexDataDefaultPath, /*must_exist=*/false);
 }
diff --git a/libartbase/base/file_utils.h b/libartbase/base/file_utils.h
index f539f5f..89ccaf7 100644
--- a/libartbase/base/file_utils.h
+++ b/libartbase/base/file_utils.h
@@ -71,6 +71,11 @@
 // Find $ANDROID_DATA, /data, or return an empty string.
 std::string GetAndroidDataSafe(/*out*/ std::string* error_msg);
 
+// Find $ANDROID_EXPAND, /mnt/expand, or abort.
+std::string GetAndroidExpand();
+// Find $ANDROID_EXPAND, /mnt/expand, or return an empty string.
+std::string GetAndroidExpandSafe(/*out*/ std::string* error_msg);
+
 // Find $ART_APEX_DATA, /data/misc/apexdata/com.android.art, or abort.
 std::string GetArtApexData();
 
diff --git a/libarttools/Android.bp b/libarttools/Android.bp
index 13fa205..db0e707 100644
--- a/libarttools/Android.bp
+++ b/libarttools/Android.bp
@@ -43,6 +43,9 @@
     shared_libs: [
         "libbase",
     ],
+    static_libs: [
+        "libc++fs",
+    ],
     export_shared_lib_headers: ["libbase"],
 }
 
diff --git a/libarttools/tools/tools.cc b/libarttools/tools/tools.cc
index a3a91e8..6c87969 100644
--- a/libarttools/tools/tools.cc
+++ b/libarttools/tools/tools.cc
@@ -16,12 +16,129 @@
 
 #include "tools.h"
 
+#include <errno.h>
+#include <fnmatch.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <functional>
+#include <string>
+#include <string_view>
+#include <system_error>
+#include <vector>
+
+#include "android-base/logging.h"
+#include "fmt/format.h"
+
 namespace art {
 namespace tools {
 
-std::string getMsg() {
-    return "hello world!";
+namespace {
+
+using ::std::placeholders::_1;
+
+using ::fmt::literals::operator""_format;  // NOLINT
+
+// Returns true if `path_prefix` matches `pattern` or can be a prefix of a path that matches
+// `pattern` (i.e., `path_prefix` represents a directory that may contain a file whose path matches
+// `pattern`).
+bool PartialMatch(const std::filesystem::path& pattern, const std::filesystem::path& path_prefix) {
+  for (std::filesystem::path::const_iterator pattern_it = pattern.begin(),
+                                             path_prefix_it = path_prefix.begin();
+       ;  // NOLINT
+       pattern_it++, path_prefix_it++) {
+    if (path_prefix_it == path_prefix.end()) {
+      return true;
+    }
+    if (pattern_it == pattern.end()) {
+      return false;
+    }
+    if (*pattern_it == "**") {
+      return true;
+    }
+    if (fnmatch(pattern_it->c_str(), path_prefix_it->c_str(), /*flags=*/0) != 0) {
+      return false;
+    }
+  }
 }
 
+bool FullMatchRecursive(const std::filesystem::path& pattern,
+                        std::filesystem::path::const_iterator pattern_it,
+                        const std::filesystem::path& path,
+                        std::filesystem::path::const_iterator path_it,
+                        bool double_asterisk_visited = false) {
+  if (pattern_it == pattern.end() && path_it == path.end()) {
+    return true;
+  }
+  if (pattern_it == pattern.end()) {
+    return false;
+  }
+  if (*pattern_it == "**") {
+    DCHECK(!double_asterisk_visited);
+    std::filesystem::path::const_iterator next_pattern_it = pattern_it;
+    return FullMatchRecursive(
+               pattern, ++next_pattern_it, path, path_it, /*double_asterisk_visited=*/true) ||
+           (path_it != path.end() && FullMatchRecursive(pattern, pattern_it, path, ++path_it));
+  }
+  if (path_it == path.end()) {
+    return false;
+  }
+  if (fnmatch(pattern_it->c_str(), path_it->c_str(), /*flags=*/0) != 0) {
+    return false;
+  }
+  return FullMatchRecursive(pattern, ++pattern_it, path, ++path_it);
 }
+
+// Returns true if `path` fully matches `pattern`.
+bool FullMatch(const std::filesystem::path& pattern, const std::filesystem::path& path) {
+  return FullMatchRecursive(pattern, pattern.begin(), path, path.begin());
 }
+
+void MatchGlobRecursive(const std::vector<std::filesystem::path>& patterns,
+                        const std::filesystem::path& root_dir,
+                        /*out*/ std::vector<std::string>* results) {
+  std::error_code ec;
+  for (auto it = std::filesystem::recursive_directory_iterator(
+           root_dir, std::filesystem::directory_options::skip_permission_denied, ec);
+       it != std::filesystem::end(it);
+       it++) {
+    const std::filesystem::directory_entry& entry = *it;
+    if (std::none_of(patterns.begin(), patterns.end(), std::bind(PartialMatch, _1, entry.path()))) {
+      // Avoid unnecessary I/O and SELinux denials.
+      it.disable_recursion_pending();
+      continue;
+    }
+    std::error_code ec2;
+    if (entry.is_regular_file(ec2) &&
+        std::any_of(patterns.begin(), patterns.end(), std::bind(FullMatch, _1, entry.path()))) {
+      results->push_back(entry.path());
+    }
+    if (ec2) {
+      // It's expected that we don't have permission to stat some dirs/files, and we don't care
+      // about them.
+      if (ec2.value() != EACCES) {
+        LOG(ERROR) << "Unable to lstat '{}': {}"_format(entry.path().string(), ec2.message());
+      }
+      continue;
+    }
+  }
+  if (ec) {
+    LOG(ERROR) << "Unable to walk through '{}': {}"_format(root_dir.string(), ec.message());
+  }
+}
+
+}  // namespace
+
+std::vector<std::string> Glob(const std::vector<std::string>& patterns, std::string_view root_dir) {
+  std::vector<std::filesystem::path> parsed_patterns;
+  parsed_patterns.reserve(patterns.size());
+  for (std::string_view pattern : patterns) {
+    parsed_patterns.emplace_back(pattern);
+  }
+  std::vector<std::string> results;
+  MatchGlobRecursive(parsed_patterns, root_dir, &results);
+  return results;
+}
+
+}  // namespace tools
+}  // namespace art
diff --git a/libarttools/tools/tools.h b/libarttools/tools/tools.h
index 8231f5f..c2bcee7 100644
--- a/libarttools/tools/tools.h
+++ b/libarttools/tools/tools.h
@@ -18,11 +18,24 @@
 #define ART_LIBARTTOOLS_TOOLS_TOOLS_H_
 
 #include <string>
+#include <string_view>
+#include <vector>
 
 namespace art {
 namespace tools {
 
-std::string getMsg();
+// Searches in a filesystem, starting from `root_dir`. Returns all regular files (i.e., excluding
+// directories, symlinks, etc.) that match at least one pattern in `patterns`. Each pattern is an
+// absolute path that contains zero or more wildcards. The scan does not follow symlinks to
+// directories.
+//
+// Supported wildcards are:
+// - Those documented in glob(7)
+// - '**': Matches zero or more path elements. This is only recognised by itself as a path segment.
+//
+// For simplicity and efficiency, at most one '**' is allowed.
+std::vector<std::string> Glob(const std::vector<std::string>& patterns,
+                              std::string_view root_dir = "/");
 
 }  // namespace tools
 }  // namespace art
diff --git a/libarttools/tools/tools_test.cc b/libarttools/tools/tools_test.cc
index 6eaa8f6..2f61181 100644
--- a/libarttools/tools/tools_test.cc
+++ b/libarttools/tools/tools_test.cc
@@ -15,14 +15,101 @@
  */
 
 #include "tools.h"
+
+#include <algorithm>
+#include <filesystem>
+#include <iterator>
+
+#include "android-base/file.h"
+#include "base/common_art_test.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace art {
+namespace tools {
+namespace {
 
-class ArtToolsTest : public testing::Test {};
+using ::android::base::WriteStringToFile;
+using ::testing::UnorderedElementsAre;
 
-TEST_F(ArtToolsTest, Hello) {
-  EXPECT_EQ("hello world!", art::tools::getMsg());
+void CreateFile(const std::string& filename) {
+  std::filesystem::path path(filename);
+  std::filesystem::create_directories(path.parent_path());
+  ASSERT_TRUE(WriteStringToFile(/*content=*/"", filename));
 }
 
+class ArtToolsTest : public CommonArtTest {
+ protected:
+  void SetUp() override {
+    CommonArtTest::SetUp();
+    scratch_dir_ = std::make_unique<ScratchDir>();
+    scratch_path_ = scratch_dir_->GetPath();
+    // Remove the trailing '/';
+    scratch_path_.resize(scratch_path_.length() - 1);
+  }
+
+  void TearDown() override {
+    scratch_dir_.reset();
+    CommonArtTest::TearDown();
+  }
+
+  std::unique_ptr<ScratchDir> scratch_dir_;
+  std::string scratch_path_;
+};
+
+TEST_F(ArtToolsTest, Glob) {
+  CreateFile(scratch_path_ + "/abc/def/000.txt");
+  CreateFile(scratch_path_ + "/abc/def/ghi/123.txt");
+  CreateFile(scratch_path_ + "/abc/def/ghi/456.txt");
+  CreateFile(scratch_path_ + "/abc/def/ghi/456.pdf");
+  CreateFile(scratch_path_ + "/abc/def/ghi/jkl/456.txt");
+  CreateFile(scratch_path_ + "/789.txt");
+  CreateFile(scratch_path_ + "/abc/789.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/789.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/bbb/789.txt");
+  CreateFile(scratch_path_ + "/abc/mno/123.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/mno/123.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/bbb/mno/123.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/bbb/mno/ccc/123.txt");
+  CreateFile(scratch_path_ + "/pqr/123.txt");
+  CreateFile(scratch_path_ + "/abc/pqr/123.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/pqr/123.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/bbb/pqr/123.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/bbb/pqr/ccc/123.txt");
+  CreateFile(scratch_path_ + "/abc/aaa/bbb/pqr/ccc/ddd/123.txt");
+
+  // This symlink will cause infinite recursion. It should not be followed.
+  std::filesystem::create_directory_symlink(scratch_path_ + "/abc/aaa/bbb/pqr",
+                                            scratch_path_ + "/abc/aaa/bbb/pqr/lnk");
+
+  // This is a directory. It should not be included in the results.
+  std::filesystem::create_directory(scratch_path_ + "/abc/def/ghi/000.txt");
+
+  std::vector<std::string> patterns = {
+      scratch_path_ + "/abc/def/000.txt",
+      scratch_path_ + "/abc/def/ghi/*.txt",
+      scratch_path_ + "/abc/**/789.txt",
+      scratch_path_ + "/abc/**/mno/*.txt",
+      scratch_path_ + "/abc/**/pqr/**",
+  };
+
+  EXPECT_THAT(Glob(patterns, scratch_path_),
+              UnorderedElementsAre(scratch_path_ + "/abc/def/000.txt",
+                                   scratch_path_ + "/abc/def/ghi/123.txt",
+                                   scratch_path_ + "/abc/def/ghi/456.txt",
+                                   scratch_path_ + "/abc/789.txt",
+                                   scratch_path_ + "/abc/aaa/789.txt",
+                                   scratch_path_ + "/abc/aaa/bbb/789.txt",
+                                   scratch_path_ + "/abc/mno/123.txt",
+                                   scratch_path_ + "/abc/aaa/mno/123.txt",
+                                   scratch_path_ + "/abc/aaa/bbb/mno/123.txt",
+                                   scratch_path_ + "/abc/pqr/123.txt",
+                                   scratch_path_ + "/abc/aaa/pqr/123.txt",
+                                   scratch_path_ + "/abc/aaa/bbb/pqr/123.txt",
+                                   scratch_path_ + "/abc/aaa/bbb/pqr/ccc/123.txt",
+                                   scratch_path_ + "/abc/aaa/bbb/pqr/ccc/ddd/123.txt"));
+}
+
+}  // namespace
+}  // namespace tools
 }  // namespace art