Enable profile data filtering in profman

Update profile merging to accept a set of apks (passes with --apk) which
will dictate what data should be processed.

When profman is invoked with a list of --apk files, only profile data
belonging to that apks will be in the output reference profile.

If no --dex-location is specified then the locations is inferred from
reding /proc/self/fd/apk_fd link.

Test: profile_assistant_test
Bug: 30934496
Change-Id: I44698c6db545ecf91454db1387c3d0e47fe5b9b3
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index ff02b5d..a00b1fa 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -31,12 +31,13 @@
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
         const std::vector<ScopedFlock>& profile_files,
-        const ScopedFlock& reference_profile_file) {
+        const ScopedFlock& reference_profile_file,
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
   DCHECK(!profile_files.empty());
 
   ProfileCompilationInfo info;
   // Load the reference profile.
-  if (!info.Load(reference_profile_file->Fd())) {
+  if (!info.Load(reference_profile_file->Fd(), /*merge_classes*/ true, filter_fn)) {
     LOG(WARNING) << "Could not load reference profile file";
     return kErrorBadProfiles;
   }
@@ -48,7 +49,7 @@
   // Merge all current profiles.
   for (size_t i = 0; i < profile_files.size(); i++) {
     ProfileCompilationInfo cur_info;
-    if (!cur_info.Load(profile_files[i]->Fd())) {
+    if (!cur_info.Load(profile_files[i]->Fd(), /*merge_classes*/ true, filter_fn)) {
       LOG(WARNING) << "Could not load profile file at index " << i;
       return kErrorBadProfiles;
     }
@@ -122,7 +123,8 @@
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<int>& profile_files_fd,
-        int reference_profile_file_fd) {
+        int reference_profile_file_fd,
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
   DCHECK_GE(reference_profile_file_fd, 0);
 
   std::string error;
@@ -143,12 +145,15 @@
     return kErrorCannotLock;
   }
 
-  return ProcessProfilesInternal(profile_files.Get(), reference_profile_file);
+  return ProcessProfilesInternal(profile_files.Get(),
+                                 reference_profile_file,
+                                 filter_fn);
 }
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<std::string>& profile_files,
-        const std::string& reference_profile_file) {
+        const std::string& reference_profile_file,
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
   std::string error;
 
   ScopedFlockList profile_files_list(profile_files.size());
@@ -164,7 +169,9 @@
     return kErrorCannotLock;
   }
 
-  return ProcessProfilesInternal(profile_files_list.Get(), locked_reference_profile_file);
+  return ProcessProfilesInternal(profile_files_list.Get(),
+                                 locked_reference_profile_file,
+                                 filter_fn);
 }
 
 }  // namespace art
diff --git a/profman/profile_assistant.h b/profman/profile_assistant.h
index be703ab..ee55584 100644
--- a/profman/profile_assistant.h
+++ b/profman/profile_assistant.h
@@ -53,16 +53,21 @@
   //
   static ProcessingResult ProcessProfiles(
       const std::vector<std::string>& profile_files,
-      const std::string& reference_profile_file);
+      const std::string& reference_profile_file,
+      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
 
   static ProcessingResult ProcessProfiles(
       const std::vector<int>& profile_files_fd_,
-      int reference_profile_file_fd);
+      int reference_profile_file_fd,
+      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
 
  private:
   static ProcessingResult ProcessProfilesInternal(
       const std::vector<ScopedFlock>& profile_files,
-      const ScopedFlock& reference_profile_file);
+      const ScopedFlock& reference_profile_file,
+      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn);
 
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index c75f3e9..79310ac 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -16,6 +16,7 @@
 
 #include <gtest/gtest.h>
 
+#include "android-base/strings.h"
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
 #include "common_runtime_test.h"
@@ -51,6 +52,28 @@
     uint32_t dex_location_checksum1 = checksum;
     std::string dex_location2 = "location2" + id;
     uint32_t dex_location_checksum2 = 10 * checksum;
+    SetupProfile(dex_location1,
+                 dex_location_checksum1,
+                 dex_location2,
+                 dex_location_checksum2,
+                 number_of_methods,
+                 number_of_classes,
+                 profile,
+                 info,
+                 start_method_index,
+                 reverse_dex_write_order);
+  }
+
+  void SetupProfile(const std::string& dex_location1,
+                    uint32_t dex_location_checksum1,
+                    const std::string& dex_location2,
+                    uint32_t dex_location_checksum2,
+                    uint16_t number_of_methods,
+                    uint16_t number_of_classes,
+                    const ScratchFile& profile,
+                    ProfileCompilationInfo* info,
+                    uint16_t start_method_index = 0,
+                    bool reverse_dex_write_order = false) {
     for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
       // reverse_dex_write_order controls the order in which the dex files will be added to
       // the profile and thus written to disk.
@@ -1128,4 +1151,89 @@
   }
 }
 
+TEST_F(ProfileAssistantTest, MergeProfilesWithFilter) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  // Use a real dex file to generate profile test data.
+  // The file will be used during merging to filter unwanted data.
+  std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+  const DexFile& d1 = *dex_files[0];
+  const DexFile& d2 = *dex_files[1];
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile(d1.GetLocation(), d1.GetLocationChecksum(), "p1", 1,
+      kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile(d2.GetLocation(), d2.GetLocationChecksum(), "p2", 2,
+      kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
+
+
+  // The reference profile info will contain the methods with indices 50-150.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info;
+  SetupProfile(d1.GetLocation(), d1.GetLocationChecksum(), "p1", 1,
+      kNumberOfMethodsAlreadyCompiled, 0, reference_profile,
+      &reference_info, kNumberOfMethodsToEnableCompilation / 2);
+
+  // Run profman and pass the dex file with --apk-fd.
+  android::base::unique_fd apk_fd(
+      open(GetTestDexFileName("ProfileTestMultiDex").c_str(), O_RDONLY));
+  ASSERT_GE(apk_fd.get(), 0);
+
+  std::string profman_cmd = GetProfmanCmd();
+  std::vector<std::string> argv_str;
+  argv_str.push_back(profman_cmd);
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile1.GetFd()));
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile2.GetFd()));
+  argv_str.push_back("--reference-profile-file-fd=" + std::to_string(reference_profile.GetFd()));
+  argv_str.push_back("--apk-fd=" + std::to_string(apk_fd.get()));
+  std::string error;
+
+  EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0) << error;
+
+  // Verify that we can load the result.
+
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+
+  // Verify that the result filtered out data not belonging to the dex file.
+  // This is equivalent to checking that the result is equal to the merging of
+  // all profiles while filtering out data not belonging to the dex file.
+
+  ProfileCompilationInfo::ProfileLoadFilterFn filter_fn =
+      [&d1, &d2](const std::string& dex_location, uint32_t checksum) -> bool {
+          return (dex_location == ProfileCompilationInfo::GetProfileDexFileKey(d1.GetLocation())
+              && checksum == d1.GetLocationChecksum())
+              || (dex_location == ProfileCompilationInfo::GetProfileDexFileKey(d2.GetLocation())
+              && checksum == d2.GetLocationChecksum());
+        };
+
+  ProfileCompilationInfo info1_filter;
+  ProfileCompilationInfo info2_filter;
+  ProfileCompilationInfo expected;
+
+  info2_filter.Load(profile1.GetFd(), /*merge_classes*/ true, filter_fn);
+  info2_filter.Load(profile2.GetFd(), /*merge_classes*/ true, filter_fn);
+  expected.Load(reference_profile.GetFd(), /*merge_classes*/ true, filter_fn);
+
+  ASSERT_TRUE(expected.MergeWith(info1_filter));
+  ASSERT_TRUE(expected.MergeWith(info2_filter));
+
+  ASSERT_TRUE(expected.Equals(result));
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index ea6c382..387ce8d 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/file.h>
+#include <sys/param.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
@@ -297,6 +298,22 @@
     }
   }
 
+  struct ProfileFilterKey {
+    ProfileFilterKey(const std::string& dex_location, uint32_t checksum)
+        : dex_location_(dex_location), checksum_(checksum) {}
+    const std::string dex_location_;
+    uint32_t checksum_;
+
+    bool operator==(const ProfileFilterKey& other) const {
+      return checksum_ == other.checksum_ && dex_location_ == other.dex_location_;
+    }
+    bool operator<(const ProfileFilterKey& other) const {
+      return checksum_ == other.checksum_
+          ?  dex_location_ < other.dex_location_
+          : checksum_ < other.checksum_;
+    }
+  };
+
   ProfileAssistant::ProcessingResult ProcessProfiles() {
     // Validate that at least one profile file was passed, as well as a reference profile.
     if (profile_files_.empty() && profile_files_fd_.empty()) {
@@ -310,6 +327,27 @@
       Usage("Options --profile-file-fd and --reference-profile-file-fd "
             "should only be used together");
     }
+
+    // Check if we have any apks which we should use to filter the profile data.
+    std::set<ProfileFilterKey> profile_filter_keys;
+    if (!GetProfileFilterKeyFromApks(&profile_filter_keys)) {
+      return ProfileAssistant::kErrorIO;
+    }
+
+    // Build the profile filter function. If the set of keys is empty it means we
+    // don't have any apks; as such we do not filter anything.
+    const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn =
+        [profile_filter_keys](const std::string& dex_location, uint32_t checksum) {
+            if (profile_filter_keys.empty()) {
+              // No --apk was specified. Accept all dex files.
+              return true;
+            } else {
+              bool res = profile_filter_keys.find(
+                  ProfileFilterKey(dex_location, checksum)) != profile_filter_keys.end();
+              return res;
+            }
+        };
+
     ProfileAssistant::ProcessingResult result;
 
     if (profile_files_.empty()) {
@@ -317,10 +355,13 @@
       // so don't check the usage.
       File file(reference_profile_file_fd_, false);
       result = ProfileAssistant::ProcessProfiles(profile_files_fd_,
-                                                 reference_profile_file_fd_);
+                                                 reference_profile_file_fd_,
+                                                 filter_fn);
       CloseAllFds(profile_files_fd_, "profile_files_fd_");
     } else {
-      result = ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_file_);
+      result = ProfileAssistant::ProcessProfiles(profile_files_,
+                                                 reference_profile_file_,
+                                                 filter_fn);
     }
     return result;
   }
@@ -329,18 +370,48 @@
     return skip_apk_verification_;
   }
 
-  void OpenApkFilesFromLocations(std::vector<std::unique_ptr<const DexFile>>* dex_files) const {
+  bool GetProfileFilterKeyFromApks(std::set<ProfileFilterKey>* profile_filter_keys) {
+    auto process_fn = [profile_filter_keys](std::unique_ptr<const DexFile>&& dex_file) {
+      // Store the profile key of the location instead of the location itself.
+      // This will make the matching in the profile filter method much easier.
+      profile_filter_keys->emplace(ProfileCompilationInfo::GetProfileDexFileKey(
+          dex_file->GetLocation()), dex_file->GetLocationChecksum());
+    };
+    return OpenApkFilesFromLocations(process_fn);
+  }
+
+  bool OpenApkFilesFromLocations(std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+    auto process_fn = [dex_files](std::unique_ptr<const DexFile>&& dex_file) {
+      dex_files->emplace_back(std::move(dex_file));
+    };
+    return OpenApkFilesFromLocations(process_fn);
+  }
+
+  bool OpenApkFilesFromLocations(
+      std::function<void(std::unique_ptr<const DexFile>&&)> process_fn) {
     bool use_apk_fd_list = !apks_fd_.empty();
     if (use_apk_fd_list) {
       // Get the APKs from the collection of FDs.
-      CHECK_EQ(dex_locations_.size(), apks_fd_.size());
+      if (dex_locations_.empty()) {
+        // Try to compute the dex locations from the file paths of the descriptions.
+        // This will make it easier to invoke profman with --apk-fd and without
+        // being force to pass --dex-location when the location would be the apk path.
+        if (!ComputeDexLocationsFromApkFds()) {
+          return false;
+        }
+      } else {
+        if (dex_locations_.size() != apks_fd_.size()) {
+            Usage("The number of apk-fds must match the number of dex-locations.");
+        }
+      }
     } else if (!apk_files_.empty()) {
-      // Get the APKs from the collection of filenames.
-      CHECK_EQ(dex_locations_.size(), apk_files_.size());
+        if (dex_locations_.size() != apk_files_.size()) {
+            Usage("The number of apk-fds must match the number of dex-locations.");
+        }
     } else {
       // No APKs were specified.
       CHECK(dex_locations_.empty());
-      return;
+      return true;
     }
     static constexpr bool kVerifyChecksum = true;
     for (size_t i = 0; i < dex_locations_.size(); ++i) {
@@ -355,8 +426,8 @@
                                     &error_msg,
                                     &dex_files_for_location)) {
         } else {
-          LOG(WARNING) << "OpenZip failed for '" << dex_locations_[i] << "' " << error_msg;
-          continue;
+          LOG(ERROR) << "OpenZip failed for '" << dex_locations_[i] << "' " << error_msg;
+          return false;
         }
       } else {
         if (dex_file_loader.Open(apk_files_[i].c_str(),
@@ -366,14 +437,36 @@
                                  &error_msg,
                                  &dex_files_for_location)) {
         } else {
-          LOG(WARNING) << "Open failed for '" << dex_locations_[i] << "' " << error_msg;
-          continue;
+          LOG(ERROR) << "Open failed for '" << dex_locations_[i] << "' " << error_msg;
+          return false;
         }
       }
       for (std::unique_ptr<const DexFile>& dex_file : dex_files_for_location) {
-        dex_files->emplace_back(std::move(dex_file));
+        process_fn(std::move(dex_file));
       }
     }
+    return true;
+  }
+
+  // Get the dex locations from the apk fds.
+  // The methods reads the links from /proc/self/fd/ to find the original apk paths
+  // and puts them in the dex_locations_ vector.
+  bool ComputeDexLocationsFromApkFds() {
+    // We can't use a char array of PATH_MAX size without exceeding the frame size.
+    // So we use a vector as the buffer for the path.
+    std::vector<char> buffer(PATH_MAX, 0);
+    for (size_t i = 0; i < apks_fd_.size(); ++i) {
+      std::string fd_path = "/proc/self/fd/" + std::to_string(apks_fd_[i]);
+      ssize_t len = readlink(fd_path.c_str(), buffer.data(), buffer.size() - 1);
+      if (len == -1) {
+        PLOG(ERROR) << "Could not open path from fd";
+        return false;
+      }
+
+      buffer[len] = '\0';
+      dex_locations_.push_back(buffer.data());
+    }
+    return true;
   }
 
   std::unique_ptr<const ProfileCompilationInfo> LoadProfile(const std::string& filename, int fd) {
@@ -416,8 +509,6 @@
     static const char* kOrdinaryProfile = "=== profile ===";
     static const char* kReferenceProfile = "=== reference profile ===";
 
-    // Open apk/zip files and and read dex files.
-    MemMap::Init();  // for ZipArchive::OpenFromFd
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     OpenApkFilesFromLocations(&dex_files);
     std::string dump;
@@ -553,8 +644,7 @@
         reference_profile_file_.empty() && !FdIsValid(reference_profile_file_fd_)) {
       Usage("No profile files or reference profile specified.");
     }
-    // Open apk/zip files and and read dex files.
-    MemMap::Init();  // for ZipArchive::OpenFromFd
+
     // Open the dex files to get the names for classes.
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     OpenApkFilesFromLocations(&dex_files);
@@ -948,8 +1038,6 @@
       Usage("Profile must be specified with --reference-profile-file or "
             "--reference-profile-file-fd");
     }
-    // for ZipArchive::OpenFromFd
-    MemMap::Init();
     // Open the profile output file if needed.
     int fd = OpenReferenceProfile();
     if (!FdIsValid(fd)) {
@@ -984,8 +1072,6 @@
   }
 
   int CreateBootProfile() {
-    // Initialize memmap since it's required to open dex files.
-    MemMap::Init();
     // Open the profile output file.
     const int reference_fd = OpenReferenceProfile();
     if (!FdIsValid(reference_fd)) {
@@ -1065,8 +1151,6 @@
                                                            test_profile_class_percentage_,
                                                            test_profile_seed_);
     } else {
-      // Initialize MemMap for ZipArchive::OpenFromFd.
-      MemMap::Init();
       // Open the dex files to look up classes and methods.
       std::vector<std::unique_ptr<const DexFile>> dex_files;
       OpenApkFilesFromLocations(&dex_files);
@@ -1089,7 +1173,7 @@
     return copy_and_update_profile_key_;
   }
 
-  bool CopyAndUpdateProfileKey() const {
+  bool CopyAndUpdateProfileKey() {
     // Validate that at least one profile file was passed, as well as a reference profile.
     if (!(profile_files_.size() == 1 ^ profile_files_fd_.size() == 1)) {
       Usage("Only one profile file should be specified.");
@@ -1133,7 +1217,8 @@
   static void CloseAllFds(const std::vector<int>& fds, const char* descriptor) {
     for (size_t i = 0; i < fds.size(); i++) {
       if (close(fds[i]) < 0) {
-        PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
+        PLOG(WARNING) << "Failed to close descriptor for "
+            << descriptor << " at index " << i << ": " << fds[i];
       }
     }
   }
@@ -1176,6 +1261,9 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   profman.ParseArgs(argc, argv);
 
+  // Initialize MemMap for ZipArchive::OpenFromFd.
+  MemMap::Init();
+
   if (profman.ShouldGenerateTestProfile()) {
     return profman.GenerateTestProfile();
   }