Load profiles from zip archives

Extend the profile logic to support loading from zip archives. The
profile will be loaded from the zip entry called 'primary.prof'.

If the zip file does not contain such an entry an empty profile will be
created. This gives more flexibility in handling archives which may lack a
profile entry.

Test: m test-art-host-gtest
Bug: 30934496
Change-Id: Ief091d995661af8302e086b19bc533187dffd463
diff --git a/runtime/Android.bp b/runtime/Android.bp
index e30a06c..12e8829 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -709,6 +709,7 @@
     ],
     shared_libs: [
         "libbacktrace",
+        "libziparchive",
     ],
     header_libs: [
         "art_cmdlineparser_headers", // For parsed_options_test.
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 74bf237..1cc5aeb 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -47,6 +47,7 @@
 #include "os.h"
 #include "safe_map.h"
 #include "utils.h"
+#include "zip_archive.h"
 
 namespace art {
 
@@ -56,6 +57,10 @@
 // before corresponding method_encodings and class_ids.
 const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '1', '0', '\0' };
 
+// The name of the profile entry in the dex metadata file.
+// DO NOT CHANGE THIS! (it's similar to classes.dex in the apk files).
+const char* ProfileCompilationInfo::kDexMetadataProfileEntry = "primary.prof";
+
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
 // Debug flag to ignore checksums when testing if a method or a class is present in the profile.
@@ -883,25 +888,13 @@
   return false;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::FillFromFd(
-      int fd,
-      const std::string& source,
-      /*out*/std::string* error) {
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::Fill(
+      ProfileSource& source,
+      const std::string& debug_stage,
+      /*out*/ std::string* error) {
   size_t byte_count = (ptr_end_ - ptr_current_) * sizeof(*ptr_current_);
   uint8_t* buffer = ptr_current_;
-  while (byte_count > 0) {
-    int bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, byte_count));
-    if (bytes_read == 0) {
-      *error += "Profile EOF reached prematurely for " + source;
-      return kProfileLoadBadData;
-    } else if (bytes_read < 0) {
-      *error += "Profile IO error for " + source + strerror(errno);
-      return kProfileLoadIOError;
-    }
-    byte_count -= bytes_read;
-    buffer += bytes_read;
-  }
-  return kProfileLoadSuccess;
+  return source.Read(buffer, byte_count, debug_stage, error);
 }
 
 size_t ProfileCompilationInfo::SafeBuffer::CountUnreadBytes() {
@@ -917,7 +910,7 @@
 }
 
 ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileHeader(
-      int fd,
+      ProfileSource& source,
       /*out*/uint8_t* number_of_dex_files,
       /*out*/uint32_t* uncompressed_data_size,
       /*out*/uint32_t* compressed_data_size,
@@ -932,7 +925,7 @@
 
   SafeBuffer safe_buffer(kMagicVersionSize);
 
-  ProfileLoadSatus status = safe_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  ProfileLoadSatus status = safe_buffer.Fill(source, "ReadProfileHeader", error);
   if (status != kProfileLoadSuccess) {
     return status;
   }
@@ -1148,31 +1141,136 @@
   return true;
 }
 
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::OpenSource(
+    int32_t fd,
+    /*out*/ std::unique_ptr<ProfileSource>* source,
+    /*out*/ std::string* error) {
+  if (IsProfileFile(fd)) {
+    source->reset(ProfileSource::Create(fd));
+    return kProfileLoadSuccess;
+  } else {
+    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, "profile", error));
+    if (zip_archive.get() == nullptr) {
+      *error = "Could not open the profile zip archive";
+      return kProfileLoadBadData;
+    }
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(kDexMetadataProfileEntry, error));
+    if (zip_entry == nullptr) {
+      // Allow archives without the profile entry. In this case, create an empty profile.
+      // This gives more flexible when ure-using archives that may miss the entry.
+      // (e.g. dex metadata files)
+      LOG(WARNING) << std::string("Could not find entry ") + kDexMetadataProfileEntry +
+            " in the zip archive. Creating an empty profile.";
+      source->reset(ProfileSource::Create(nullptr));
+      return kProfileLoadSuccess;
+    }
+    if (zip_entry->GetUncompressedLength() == 0) {
+      *error = "Empty profile entry in the zip archive.";
+      return kProfileLoadBadData;
+    }
+
+    std::unique_ptr<MemMap> map;
+    if (zip_entry->IsUncompressed()) {
+      // Map uncompressed files within zip as file-backed to avoid a dirty copy.
+      map.reset(zip_entry->MapDirectlyFromFile(kDexMetadataProfileEntry, error));
+      if (map == nullptr) {
+        LOG(WARNING) << "Can't mmap profile directly; "
+                     << "is your ZIP file corrupted? Falling back to extraction.";
+        // Try again with Extraction which still has a chance of recovery.
+      }
+    }
+
+    if (map == nullptr) {
+      // Default path for compressed ZIP entries, and fallback for stored ZIP entries.
+      // TODO(calin) pass along file names to assist with debugging.
+      map.reset(zip_entry->ExtractToMemMap("profile file", kDexMetadataProfileEntry, error));
+    }
+
+    if (map != nullptr) {
+      source->reset(ProfileSource::Create(std::move(map)));
+      return kProfileLoadSuccess;
+    } else {
+      return kProfileLoadBadData;
+    }
+  }
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ProfileSource::Read(
+    uint8_t* buffer,
+    size_t byte_count,
+    const std::string& debug_stage,
+    std::string* error) {
+  if (IsMemMap()) {
+    if (mem_map_cur_ + byte_count > mem_map_->Size()) {
+      return kProfileLoadBadData;
+    }
+    for (size_t i = 0; i < byte_count; i++) {
+      buffer[i] = *(mem_map_->Begin() + mem_map_cur_);
+      mem_map_cur_++;
+    }
+  } else {
+    while (byte_count > 0) {
+      int bytes_read = TEMP_FAILURE_RETRY(read(fd_, buffer, byte_count));;
+      if (bytes_read == 0) {
+        *error += "Profile EOF reached prematurely for " + debug_stage;
+        return kProfileLoadBadData;
+      } else if (bytes_read < 0) {
+        *error += "Profile IO error for " + debug_stage + strerror(errno);
+        return kProfileLoadIOError;
+      }
+      byte_count -= bytes_read;
+      buffer += bytes_read;
+    }
+  }
+  return kProfileLoadSuccess;
+}
+
+bool ProfileCompilationInfo::ProfileSource::HasConsumedAllData() const {
+  return IsMemMap()
+      ? (mem_map_ == nullptr || mem_map_cur_ == mem_map_->Size())
+      : (testEOF(fd_) == 0);
+}
+
+bool ProfileCompilationInfo::ProfileSource::HasEmptyContent() const {
+  if (IsMemMap()) {
+    return mem_map_ == nullptr || mem_map_->Size() == 0;
+  } else {
+    struct stat stat_buffer;
+    if (fstat(fd_, &stat_buffer) != 0) {
+      return false;
+    }
+    return stat_buffer.st_size == 0;
+  }
+}
+
 // TODO(calin): fail fast if the dex checksums don't match.
 ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::LoadInternal(
-      int fd, std::string* error, bool merge_classes) {
+      int32_t fd, std::string* error, bool merge_classes) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
-  struct stat stat_buffer;
-  if (fstat(fd, &stat_buffer) != 0) {
-    return kProfileLoadIOError;
+  std::unique_ptr<ProfileSource> source;
+  ProfileLoadSatus status = OpenSource(fd, &source, error);
+  if (status != kProfileLoadSuccess) {
+    return status;
   }
+
   // We allow empty profile files.
   // Profiles may be created by ActivityManager or installd before we manage to
   // process them in the runtime or profman.
-  if (stat_buffer.st_size == 0) {
+  if (source->HasEmptyContent()) {
     return kProfileLoadSuccess;
   }
+
   // Read profile header: magic + version + number_of_dex_files.
   uint8_t number_of_dex_files;
   uint32_t uncompressed_data_size;
   uint32_t compressed_data_size;
-  ProfileLoadSatus status = ReadProfileHeader(fd,
-                                              &number_of_dex_files,
-                                              &uncompressed_data_size,
-                                              &compressed_data_size,
-                                              error);
+  status = ReadProfileHeader(*source,
+                             &number_of_dex_files,
+                             &uncompressed_data_size,
+                             &compressed_data_size,
+                             error);
 
   if (status != kProfileLoadSuccess) {
     return status;
@@ -1192,16 +1290,14 @@
   }
 
   std::unique_ptr<uint8_t[]> compressed_data(new uint8_t[compressed_data_size]);
-  bool bytes_read_success =
-      android::base::ReadFully(fd, compressed_data.get(), compressed_data_size);
-
-  if (testEOF(fd) != 0) {
-    *error += "Unexpected data in the profile file.";
-    return kProfileLoadBadData;
+  status = source->Read(compressed_data.get(), compressed_data_size, "ReadContent", error);
+  if (status != kProfileLoadSuccess) {
+    *error += "Unable to read compressed profile data";
+    return status;
   }
 
-  if (!bytes_read_success) {
-    *error += "Unable to read compressed profile data";
+  if (!source->HasConsumedAllData()) {
+    *error += "Unexpected data in the profile file.";
     return kProfileLoadBadData;
   }
 
@@ -1904,4 +2000,34 @@
   return ret;
 }
 
+bool ProfileCompilationInfo::IsProfileFile(int fd) {
+  // First check if it's an empty file as we allow empty profile files.
+  // Profiles may be created by ActivityManager or installd before we manage to
+  // process them in the runtime or profman.
+  struct stat stat_buffer;
+  if (fstat(fd, &stat_buffer) != 0) {
+    return false;
+  }
+
+  if (stat_buffer.st_size == 0) {
+    return true;
+  }
+
+  // The files is not empty. Check if it contains the profile magic.
+  size_t byte_count = sizeof(kProfileMagic);
+  uint8_t buffer[sizeof(kProfileMagic)];
+  if (!android::base::ReadFully(fd, buffer, byte_count)) {
+    return false;
+  }
+
+  // Reset the offset to prepare the file for reading.
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to reset the offset";
+    return false;
+  }
+
+  return memcmp(buffer, kProfileMagic, byte_count) == 0;
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index 7c30dee..5828563 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -28,6 +28,7 @@
 #include "dex/dex_file.h"
 #include "dex/dex_file_types.h"
 #include "method_reference.h"
+#include "mem_map.h"
 #include "safe_map.h"
 #include "type_reference.h"
 
@@ -71,6 +72,8 @@
   static const uint8_t kProfileMagic[];
   static const uint8_t kProfileVersion[];
 
+  static const char* kDexMetadataProfileEntry;
+
   // Data structures for encoding the offline representation of inline caches.
   // This is exposed as public in order to make it available to dex2oat compilations
   // (see compiler/optimizing/inliner.cc).
@@ -410,6 +413,9 @@
   // Return all of the class descriptors in the profile for a set of dex files.
   std::unordered_set<std::string> GetClassDescriptors(const std::vector<const DexFile*>& dex_files);
 
+  // Return true if the fd points to a profile file.
+  bool IsProfileFile(int fd);
+
  private:
   enum ProfileLoadSatus {
     kProfileLoadWouldOverwiteData,
@@ -577,6 +583,58 @@
     uint32_t num_method_ids;
   };
 
+  /**
+   * Encapsulate the source of profile data for loading.
+   * The source can be either a plain file or a zip file.
+   * For zip files, the profile entry will be extracted to
+   * the memory map.
+   */
+  class ProfileSource {
+   public:
+    /**
+     * Create a profile source for the given fd. The ownership of the fd
+     * remains to the caller; as this class will not attempt to close it at any
+     * point.
+     */
+    static ProfileSource* Create(int32_t fd) {
+      DCHECK_GT(fd, -1);
+      return new ProfileSource(fd, /*map*/ nullptr);
+    }
+
+    /**
+     * Create a profile source backed by a memory map. The map can be null in
+     * which case it will the treated as an empty source.
+     */
+    static ProfileSource* Create(std::unique_ptr<MemMap>&& mem_map) {
+      return new ProfileSource(/*fd*/ -1, std::move(mem_map));
+    }
+
+    /**
+     * Read bytes from this source.
+     * Reading will advance the current source position so subsequent
+     * invocations will read from the las position.
+     */
+    ProfileLoadSatus Read(uint8_t* buffer,
+                          size_t byte_count,
+                          const std::string& debug_stage,
+                          std::string* error);
+
+    /** Return true if the source has 0 data. */
+    bool HasEmptyContent() const;
+    /** Return true if all the information from this source has been read. */
+    bool HasConsumedAllData() const;
+
+   private:
+    ProfileSource(int32_t fd, std::unique_ptr<MemMap>&& mem_map)
+        : fd_(fd), mem_map_(std::move(mem_map)), mem_map_cur_(0) {}
+
+    bool IsMemMap() const { return fd_ == -1; }
+
+    int32_t fd_;  // The fd is not owned by this class.
+    std::unique_ptr<MemMap> mem_map_;
+    size_t mem_map_cur_;  // Current position in the map to read from.
+  };
+
   // A helper structure to make sure we don't read past our buffers in the loops.
   struct SafeBuffer {
    public:
@@ -586,13 +644,9 @@
     }
 
     // Reads the content of the descriptor at the current position.
-    ProfileLoadSatus FillFromFd(int fd,
-                                const std::string& source,
-                                /*out*/std::string* error);
-
-    ProfileLoadSatus FillFromBuffer(uint8_t* buffer_ptr,
-                                    const std::string& source,
-                                    /*out*/std::string* error);
+    ProfileLoadSatus Fill(ProfileSource& source,
+                          const std::string& debug_stage,
+                          /*out*/std::string* error);
 
     // Reads an uint value (high bits to low bits) and advances the current pointer
     // with the number of bits read.
@@ -620,12 +674,18 @@
     uint8_t* ptr_current_;
   };
 
+  ProfileLoadSatus OpenSource(int32_t fd,
+                              /*out*/ std::unique_ptr<ProfileSource>* source,
+                              /*out*/ std::string* error);
+
   // Entry point for profile loding functionality.
-  ProfileLoadSatus LoadInternal(int fd, std::string* error, bool merge_classes = true);
+  ProfileLoadSatus LoadInternal(int32_t fd,
+                                std::string* error,
+                                bool merge_classes = true);
 
   // Read the profile header from the given fd and store the number of profile
   // lines into number_of_dex_files.
-  ProfileLoadSatus ReadProfileHeader(int fd,
+  ProfileLoadSatus ReadProfileHeader(ProfileSource& source,
                                      /*out*/uint8_t* number_of_dex_files,
                                      /*out*/uint32_t* size_uncompressed_data,
                                      /*out*/uint32_t* size_compressed_data,
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 08042cc..6ce9bcb 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include <gtest/gtest.h>
+#include <stdio.h>
 
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
@@ -29,6 +30,7 @@
 #include "mirror/class_loader.h"
 #include "scoped_thread_state_change-inl.h"
 #include "type_reference.h"
+#include "ziparchive/zip_writer.h"
 
 namespace art {
 
@@ -268,6 +270,50 @@
     }
   }
 
+  void TestProfileLoadFromZip(const char* zip_entry,
+                              size_t zip_flags,
+                              bool should_succeed,
+                              bool should_succeed_with_empty_profile = false) {
+    // Create a valid profile.
+    ScratchFile profile;
+    ProfileCompilationInfo saved_info;
+    for (uint16_t i = 0; i < 10; i++) {
+      ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+      ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    }
+    ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+
+    // Prepare the profile content for zipping.
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+    uint64_t data_size = profile.GetFile()->GetLength();
+    std::unique_ptr<uint8_t> data(new uint8_t[data_size]);
+    ASSERT_TRUE(profile.GetFile()->ReadFully(data.get(), data_size));
+
+    // Zip the profile content.
+    ScratchFile zip;
+    FILE* file = fopen(zip.GetFile()->GetPath().c_str(), "wb");
+    ZipWriter writer(file);
+    writer.StartEntry(zip_entry, zip_flags);
+    writer.WriteBytes(data.get(), data_size);
+    writer.FinishEntry();
+    writer.Finish();
+    fflush(file);
+    fclose(file);
+
+    // Verify loading from the zip archive.
+    ProfileCompilationInfo loaded_info;
+    ASSERT_TRUE(zip.GetFile()->ResetOffset());
+    ASSERT_EQ(should_succeed, loaded_info.Load(zip.GetFile()->GetPath(), false));
+    if (should_succeed) {
+      if (should_succeed_with_empty_profile) {
+        ASSERT_TRUE(loaded_info.IsEmpty());
+      } else {
+        ASSERT_TRUE(loaded_info.Equals(saved_info));
+      }
+    }
+  }
+
   // Cannot sizeof the actual arrays so hard code the values here.
   // They should not change anyway.
   static constexpr int kProfileMagicSize = 4;
@@ -934,4 +980,64 @@
   }
 }
 
+TEST_F(ProfileCompilationInfoTest, LoadFromZipCompress) {
+  TestProfileLoadFromZip("primary.prof",
+                         ZipWriter::kCompress | ZipWriter::kAlign32,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipUnCompress) {
+  TestProfileLoadFromZip("primary.prof",
+                         ZipWriter::kAlign32,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipUnAligned) {
+  TestProfileLoadFromZip("primary.prof",
+                         0,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipFailBadZipEntry) {
+  TestProfileLoadFromZip("invalid.profile.entry",
+                         0,
+                         /*should_succeed*/true,
+                         /*should_succeed_with_empty_profile*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipFailBadProfile) {
+  // Create a bad profile.
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Prepare the profile content for zipping.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  uint64_t data_size = profile.GetFile()->GetLength();
+  std::unique_ptr<uint8_t> data(new uint8_t[data_size]);
+  ASSERT_TRUE(profile.GetFile()->ReadFully(data.get(), data_size));
+
+  // Zip the profile content.
+  ScratchFile zip;
+  FILE* file = fopen(zip.GetFile()->GetPath().c_str(), "wb");
+  ZipWriter writer(file);
+  writer.StartEntry("primary.prof", ZipWriter::kCompress | ZipWriter::kAlign32);
+  writer.WriteBytes(data.get(), data_size);
+  writer.FinishEntry();
+  writer.Finish();
+  fflush(file);
+  fclose(file);
+
+  // Check that we failed to load.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(zip.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(zip)));
+}
+
 }  // namespace art