Add logic to flatten profile info

The flattening operation will extract all methods and classes from a
set of dex files in a list.

This is a pre-step for adding boot image profile generation option in
profman.

Test: gtest
Bug: 152574358
Merged-In: I35249d719bafc4550016c48a53503e86258874a1
Change-Id: I35249d719bafc4550016c48a53503e86258874a1
diff --git a/libprofile/profile/profile_compilation_info.cc b/libprofile/profile/profile_compilation_info.cc
index 57d51dc..010949a 100644
--- a/libprofile/profile/profile_compilation_info.cc
+++ b/libprofile/profile/profile_compilation_info.cc
@@ -75,7 +75,7 @@
   ProfileCompilationInfo::ProfileSampleAnnotation::kNone =
       ProfileCompilationInfo::ProfileSampleAnnotation("");
 
-static constexpr char kSampleMetdataSeparator = ':';
+static constexpr char kSampleMetadataSeparator = ':';
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -164,7 +164,7 @@
   std::string base_key = GetProfileDexFileBaseKey(dex_location);
   return annotation == ProfileSampleAnnotation::kNone
       ? base_key
-      : base_key + kSampleMetdataSeparator + annotation.GetOriginPackageName();;
+      : base_key + kSampleMetadataSeparator + annotation.GetOriginPackageName();;
 }
 
 // Transform the actual dex location into a base profile key (represented as relative paths).
@@ -184,19 +184,27 @@
 
 std::string ProfileCompilationInfo::GetBaseKeyFromAugmentedKey(
     const std::string& profile_key) {
-  size_t pos = profile_key.rfind(kSampleMetdataSeparator);
+  size_t pos = profile_key.rfind(kSampleMetadataSeparator);
   return (pos == std::string::npos) ? profile_key : profile_key.substr(0, pos);
 }
 
 std::string ProfileCompilationInfo::MigrateAnnotationInfo(
     const std::string& base_key,
     const std::string& augmented_key) {
-  size_t pos = augmented_key.rfind(kSampleMetdataSeparator);
+  size_t pos = augmented_key.rfind(kSampleMetadataSeparator);
   return (pos == std::string::npos)
       ? base_key
       : base_key + augmented_key.substr(pos);
 }
 
+ProfileCompilationInfo::ProfileSampleAnnotation ProfileCompilationInfo::GetAnnotationFromKey(
+     const std::string& augmented_key) {
+  size_t pos = augmented_key.rfind(kSampleMetadataSeparator);
+  return (pos == std::string::npos)
+      ? ProfileSampleAnnotation::kNone
+      : ProfileSampleAnnotation(augmented_key.substr(pos + 1));
+}
+
 bool ProfileCompilationInfo::AddMethods(const std::vector<ProfileMethodInfo>& methods,
                                         MethodHotness::Flag flags,
                                         const ProfileSampleAnnotation& annotation) {
@@ -690,6 +698,19 @@
   return nullptr;
 }
 
+void ProfileCompilationInfo::FindAllDexData(
+    const DexFile* dex_file,
+    /*out*/ std::vector<const ProfileCompilationInfo::DexFileData*>* result) const {
+  std::string profile_key = GetProfileDexFileBaseKey(dex_file->GetLocation());
+  for (const DexFileData* dex_data : info_) {
+    if (profile_key == GetBaseKeyFromAugmentedKey(dex_data->profile_key)) {
+      if (ChecksumMatch(dex_data->checksum, dex_file->GetLocationChecksum())) {
+        result->push_back(dex_data);
+      }
+    }
+  }
+}
+
 bool ProfileCompilationInfo::AddMethod(const ProfileMethodInfo& pmi,
                                        MethodHotness::Flag flags,
                                        const ProfileSampleAnnotation& annotation) {
@@ -2263,4 +2284,76 @@
     ? sizeof(ProfileIndexType)
     : sizeof(ProfileIndexTypeRegular);
 }
+
+FlattenProfileData::FlattenProfileData() :
+    max_aggregation_for_methods_(0),
+    max_aggregation_for_classes_(0) {
+}
+
+FlattenProfileData::ItemMetadata::ItemMetadata() :
+    flags_(0) {
+}
+
+FlattenProfileData::ItemMetadata::ItemMetadata(const ItemMetadata& other) :
+    flags_(other.flags_),
+    annotations_(other.annotations_) {
+}
+
+std::unique_ptr<FlattenProfileData> ProfileCompilationInfo::ExtractProfileData(
+    const std::vector<std::unique_ptr<const DexFile>>& dex_files) const {
+
+  std::unique_ptr<FlattenProfileData> result(new FlattenProfileData());
+
+  auto createMetadataFn = []() { return FlattenProfileData::ItemMetadata(); };
+
+  // Iterate through all the dex files, find the methods/classes associated with each of them,
+  // and add them to the flatten result.
+  for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+    // Find all the dex data for the given dex file.
+    // We may have multiple dex data if the methods or classes were added using
+    // different annotations.
+    std::vector<const DexFileData*> all_dex_data;
+    FindAllDexData(dex_file.get(), &all_dex_data);
+    for (const DexFileData* dex_data : all_dex_data) {
+      // Extract the annotation from the key as we want to store it in the flatten result.
+      ProfileSampleAnnotation annotation = GetAnnotationFromKey(dex_data->profile_key);
+
+      // Check which methods from the current dex files are in the profile.
+      for (uint32_t method_idx = 0; method_idx < dex_data->num_method_ids; ++method_idx) {
+        MethodHotness hotness = dex_data->GetHotnessInfo(method_idx);
+        if (!hotness.IsInProfile()) {
+          // Not in the profile, continue.
+          continue;
+        }
+        // The method is in the profile, create metadata item for it and added to the result.
+        MethodReference ref(dex_file.get(), method_idx);
+        FlattenProfileData::ItemMetadata& metadata =
+            result->method_metadata_.GetOrCreate(ref, createMetadataFn);
+        metadata.flags_ |= hotness.flags_;
+        metadata.annotations_.insert(annotation);
+        // Update the max aggregation counter for methods.
+        // This is essentially a cache, to avoid traversing all the methods just to find out
+        // this value.
+        result->max_aggregation_for_methods_ = std::max(
+            result->max_aggregation_for_methods_,
+            static_cast<uint32_t>(metadata.annotations_.size()));
+      }
+
+      // Check which classes from the current dex files are in the profile.
+      for (const dex::TypeIndex& type_index : dex_data->class_set) {
+        TypeReference ref(dex_file.get(), type_index);
+        FlattenProfileData::ItemMetadata& metadata =
+            result->class_metadata_.GetOrCreate(ref, createMetadataFn);
+        metadata.annotations_.insert(annotation);
+        // Update the max aggregation counter for classes.
+        result->max_aggregation_for_classes_ = std::max(
+            result->max_aggregation_for_classes_,
+            static_cast<uint32_t>(metadata.annotations_.size()));
+      }
+    }
+  }
+
+  return result;
+}
+
 }  // namespace art
diff --git a/libprofile/profile/profile_compilation_info.h b/libprofile/profile/profile_compilation_info.h
index fa70e8b..f0dc9b4 100644
--- a/libprofile/profile/profile_compilation_info.h
+++ b/libprofile/profile/profile_compilation_info.h
@@ -17,6 +17,7 @@
 #ifndef ART_LIBPROFILE_PROFILE_PROFILE_COMPILATION_INFO_H_
 #define ART_LIBPROFILE_PROFILE_PROFILE_COMPILATION_INFO_H_
 
+#include <list>
 #include <set>
 #include <vector>
 
@@ -61,6 +62,8 @@
   std::vector<ProfileInlineCache> inline_caches;
 };
 
+class FlattenProfileData;
+
 /**
  * Profile information in a format suitable to be queried by the compiler and
  * performing profile guided compilation.
@@ -306,6 +309,10 @@
 
     bool operator==(const ProfileSampleAnnotation& other) const;
 
+    bool operator<(const ProfileSampleAnnotation& other) const {
+      return origin_package_name_ < other.origin_package_name_;
+    }
+
     // A convenient empty annotation object that can be used to denote that no annotation should
     // be associated with the profile samples.
     static const ProfileSampleAnnotation kNone;
@@ -498,6 +505,10 @@
   // Returns a base key without the annotation information.
   static std::string GetBaseKeyFromAugmentedKey(const std::string& profile_key);
 
+  // Returns the annotations from an augmented key.
+  // If the key is a base key it return ProfileSampleAnnotation::kNone.
+  static ProfileSampleAnnotation GetAnnotationFromKey(const std::string& augmented_key);
+
   // Generate a test profile which will contain a percentage of the total maximum
   // number of methods and classes (method_ratio and class_ratio).
   static bool GenerateTestProfile(int fd,
@@ -561,6 +572,15 @@
   // Return the version of this profile.
   const uint8_t* GetVersion() const;
 
+  // Extracts the data that the profile has on the given dex files:
+  //  - for each method and class, a list of the corresponding annotations and flags
+  //  - the maximum number of aggregations for classes and classes across dex files with different
+  //    annotations (essentially this sums up how many different packages used the corresponding
+  //    method). This information is reconstructible from the other two pieces of info, but it's
+  //    convenient to have it precomputed.
+  std::unique_ptr<FlattenProfileData> ExtractProfileData(
+      const std::vector<std::unique_ptr<const DexFile>>& dex_files) const;
+
  private:
   enum ProfileLoadStatus {
     kProfileLoadWouldOverwiteData,
@@ -695,6 +715,11 @@
       const DexFile* dex_file,
       const ProfileSampleAnnotation& annotation) const;
 
+  // Same as FindDexDataUsingAnnotations but extracts the data for all annotations.
+  void FindAllDexData(
+      const DexFile* dex_file,
+      /*out*/ std::vector<const ProfileCompilationInfo::DexFileData*>* result) const;
+
   // Inflate the input buffer (in_buffer) of size in_size. It returns a buffer of
   // compressed data for the input buffer of "compressed_data_size" size.
   std::unique_ptr<uint8_t[]> DeflateBuffer(const uint8_t* in_buffer,
@@ -949,8 +974,78 @@
   uint8_t version_[kProfileVersionSize];
 };
 
+/**
+ * Flatten profile data that list all methods and type references together
+ * with their metadata (such as flags or annotation list).
+ */
+class FlattenProfileData {
+ public:
+  class ItemMetadata {
+   public:
+    ItemMetadata();
+    ItemMetadata(const ItemMetadata& other);
+
+    uint16_t GetFlags() const {
+      return flags_;
+    }
+
+    const std::set<ProfileCompilationInfo::ProfileSampleAnnotation>& GetAnnotations() const {
+      return annotations_;
+    }
+
+    void AddFlag(ProfileCompilationInfo::MethodHotness::Flag flag) {
+      flags_ |= flag;
+    }
+
+    bool HasFlagSet(ProfileCompilationInfo::MethodHotness::Flag flag) const {
+      return (flags_ & flag) != 0;
+    }
+
+   private:
+    // will be 0 for classes and MethodHotness::Flags for methods.
+    uint16_t flags_;
+    std::set<ProfileCompilationInfo::ProfileSampleAnnotation> annotations_;
+
+    friend class ProfileCompilationInfo;
+  };
+
+  FlattenProfileData();
+
+  const SafeMap<MethodReference, ItemMetadata>& GetMethodData() const {
+    return method_metadata_;
+  }
+
+  const SafeMap<TypeReference, ItemMetadata>& GetClassData() const {
+    return class_metadata_;
+  }
+
+  uint32_t GetMaxAggregationForMethods() const {
+    return max_aggregation_for_methods_;
+  }
+
+  uint32_t GetMaxAggregationForClasses() const {
+    return max_aggregation_for_classes_;
+  }
+
+ private:
+  // Method data.
+  SafeMap<MethodReference, ItemMetadata> method_metadata_;
+  // Class data.
+  SafeMap<TypeReference, ItemMetadata> class_metadata_;
+  // Maximum aggregation counter for all methods.
+  // This is essentially a cache equal to the max size of any method's annation set.
+  // It avoids the traversal of all the methods which can be quite expensive.
+  uint32_t max_aggregation_for_methods_;
+  // Maximum aggregation counter for all classes.
+  // Simillar to max_aggregation_for_methods_.
+  uint32_t max_aggregation_for_classes_;
+
+  friend class ProfileCompilationInfo;
+};
+
 std::ostream& operator<<(std::ostream& stream,
                          const ProfileCompilationInfo::DexReference& dex_ref);
+
 }  // namespace art
 
 #endif  // ART_LIBPROFILE_PROFILE_PROFILE_COMPILATION_INFO_H_
diff --git a/libprofile/profile/profile_compilation_info_test.cc b/libprofile/profile/profile_compilation_info_test.cc
index 8b1cedf..81d0cc9 100644
--- a/libprofile/profile/profile_compilation_info_test.cc
+++ b/libprofile/profile/profile_compilation_info_test.cc
@@ -36,6 +36,7 @@
 using ProfileSampleAnnotation = ProfileCompilationInfo::ProfileSampleAnnotation;
 using ProfileIndexType = ProfileCompilationInfo::ProfileIndexType;
 using ProfileIndexTypeRegular = ProfileCompilationInfo::ProfileIndexTypeRegular;
+using ItemMetadata = FlattenProfileData::ItemMetadata;
 
 static constexpr size_t kMaxMethodIds = 65535;
 static uint32_t kMaxHotnessFlagBootIndex =
@@ -1707,4 +1708,66 @@
   ASSERT_FALSE(info.IsForBootImage());
 }
 
+// Verify we can merge samples with annotations.
+TEST_F(ProfileCompilationInfoTest, ExtractProfileData) {
+  // Setup test data
+  ProfileCompilationInfo info;
+
+  ProfileSampleAnnotation psa1("test1");
+  ProfileSampleAnnotation psa2("test2");
+
+  for (uint16_t i = 0; i < 10; i++) {
+    // Add dex1 data with different annotations so that we can check the annotation count.
+    ASSERT_TRUE(AddMethod(&info, dex1, /* method_idx= */ i, Hotness::kFlagHot, psa1));
+    ASSERT_TRUE(AddClass(&info, dex1, dex::TypeIndex(i), psa1));
+    ASSERT_TRUE(AddMethod(&info, dex1, /* method_idx= */ i, Hotness::kFlagStartup, psa2));
+    ASSERT_TRUE(AddClass(&info, dex1, dex::TypeIndex(i), psa2));
+    ASSERT_TRUE(AddMethod(&info, dex2, /* method_idx= */ i, Hotness::kFlagHot, psa2));
+    // dex3 will not be used in the data extraction
+    ASSERT_TRUE(AddMethod(&info, dex3, /* method_idx= */ i, Hotness::kFlagHot, psa2));
+  }
+
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  dex_files.push_back(std::unique_ptr<const DexFile>(dex1));
+  dex_files.push_back(std::unique_ptr<const DexFile>(dex2));
+
+  // Run the test: extract the data for dex1 and dex2
+  std::unique_ptr<FlattenProfileData> flattenProfileData = info.ExtractProfileData(dex_files);
+
+  // Check the results
+  ASSERT_TRUE(flattenProfileData != nullptr);
+  ASSERT_EQ(flattenProfileData->GetMaxAggregationForMethods(), 2u);
+  ASSERT_EQ(flattenProfileData->GetMaxAggregationForClasses(), 2u);
+
+  const SafeMap<MethodReference, ItemMetadata>& methods = flattenProfileData->GetMethodData();
+  const SafeMap<TypeReference, ItemMetadata>& classes = flattenProfileData->GetClassData();
+  ASSERT_EQ(methods.size(), 20u);  // 10 methods in dex1, 10 in dex2
+  ASSERT_EQ(classes.size(), 10u);  // 10 methods in dex1
+
+  std::set<ProfileSampleAnnotation> expectedAnnotations1({psa1, psa2});
+  std::set<ProfileSampleAnnotation> expectedAnnotations2({psa2});
+  for (uint16_t i = 0; i < 10; i++) {
+    // Check dex1 methods.
+    auto mIt1 = methods.find(MethodReference(dex1, i));
+    ASSERT_TRUE(mIt1 != methods.end());
+    ASSERT_EQ(mIt1->second.GetFlags(), Hotness::kFlagHot | Hotness::kFlagStartup);
+    ASSERT_EQ(mIt1->second.GetAnnotations(), expectedAnnotations1);
+    // Check dex1 classes
+    auto cIt1 = classes.find(TypeReference(dex1, dex::TypeIndex(i)));
+    ASSERT_TRUE(cIt1 != classes.end());
+    ASSERT_EQ(cIt1->second.GetFlags(), 0);
+    ASSERT_EQ(cIt1->second.GetAnnotations(), expectedAnnotations1);
+    // Check dex2 methods.
+    auto mIt2 = methods.find(MethodReference(dex2, i));
+    ASSERT_TRUE(mIt2 != methods.end());
+    ASSERT_EQ(mIt2->second.GetFlags(), Hotness::kFlagHot);
+    ASSERT_EQ(mIt2->second.GetAnnotations(), expectedAnnotations2);
+  }
+
+  // Release the ownership as this is held by the test class;
+  for (std::unique_ptr<const DexFile>& dex : dex_files) {
+    UNUSED(dex.release());
+  }
+}
+
 }  // namespace art