Use libziparchive for art zip processing.

This is part of the effort to move all VM & framework
zip parsing to a common implementation. This also has
the side effect of fixing various TODOs related to
crc32 checking.

bug: 10193060

Change-Id: I407f9ad5a94fc91d96ff43556adde00a00df1f14
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index db273ec..8cb1993 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -30,272 +30,23 @@
 
 namespace art {
 
-static const size_t kBufSize = 32 * KB;
-
-// Get 2 little-endian bytes.
-static uint32_t Le16ToHost(const byte* src) {
-  return ((src[0] <<  0) |
-          (src[1] <<  8));
-}
-
-// Get 4 little-endian bytes.
-static uint32_t Le32ToHost(const byte* src) {
-  return ((src[0] <<  0) |
-          (src[1] <<  8) |
-          (src[2] << 16) |
-          (src[3] << 24));
-}
-
-uint16_t ZipEntry::GetCompressionMethod() {
-  return Le16ToHost(ptr_ + ZipArchive::kCDEMethod);
-}
-
-uint32_t ZipEntry::GetCompressedLength() {
-  return Le32ToHost(ptr_ + ZipArchive::kCDECompLen);
-}
-
 uint32_t ZipEntry::GetUncompressedLength() {
-  return Le32ToHost(ptr_ + ZipArchive::kCDEUncompLen);
+  return zip_entry_->uncompressed_length;
 }
 
 uint32_t ZipEntry::GetCrc32() {
-  return Le32ToHost(ptr_ + ZipArchive::kCDECRC);
+  return zip_entry_->crc32;
 }
 
-off64_t ZipEntry::GetDataOffset() {
-  // All we have is the offset to the Local File Header, which is
-  // variable size, so we have to read the contents of the struct to
-  // figure out where the actual data starts.
-
-  // We also need to make sure that the lengths are not so large that
-  // somebody trying to map the compressed or uncompressed data runs
-  // off the end of the mapped region.
-
-  off64_t dir_offset = zip_archive_->dir_offset_;
-  int64_t lfh_offset = Le32ToHost(ptr_ + ZipArchive::kCDELocalOffset);
-  if (lfh_offset + ZipArchive::kLFHLen >= dir_offset) {
-    LOG(WARNING) << "Zip: bad LFH offset in zip";
-    return -1;
-  }
-
-  if (lseek64(zip_archive_->fd_, lfh_offset, SEEK_SET) != lfh_offset) {
-    PLOG(WARNING) << "Zip: failed seeking to LFH at offset " << lfh_offset;
-    return -1;
-  }
-
-  uint8_t lfh_buf[ZipArchive::kLFHLen];
-  ssize_t actual = TEMP_FAILURE_RETRY(read(zip_archive_->fd_, lfh_buf, sizeof(lfh_buf)));
-  if (actual != sizeof(lfh_buf)) {
-    LOG(WARNING) << "Zip: failed reading LFH from offset " << lfh_offset;
-    return -1;
-  }
-
-  if (Le32ToHost(lfh_buf) != ZipArchive::kLFHSignature) {
-    LOG(WARNING) << "Zip: didn't find signature at start of LFH, offset " << lfh_offset;
-    return -1;
-  }
-
-  uint32_t gpbf = Le16ToHost(lfh_buf + ZipArchive::kLFHGPBFlags);
-  if ((gpbf & ZipArchive::kGPFUnsupportedMask) != 0) {
-    LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf;
-    return -1;
-  }
-
-  off64_t data_offset = (lfh_offset + ZipArchive::kLFHLen
-                       + Le16ToHost(lfh_buf + ZipArchive::kLFHNameLen)
-                       + Le16ToHost(lfh_buf + ZipArchive::kLFHExtraLen));
-  if (data_offset >= dir_offset) {
-    LOG(WARNING) << "Zip: bad data offset " << data_offset << " in zip";
-    return -1;
-  }
-
-  // check lengths
-
-  if (static_cast<off64_t>(data_offset + GetCompressedLength()) > dir_offset) {
-    LOG(WARNING) << "Zip: bad compressed length in zip "
-                 << "(" << data_offset << " + " << GetCompressedLength()
-                 << " > " << dir_offset << ")";
-    return -1;
-  }
-
-  if (GetCompressionMethod() == kCompressStored
-      && static_cast<off64_t>(data_offset + GetUncompressedLength()) > dir_offset) {
-    LOG(WARNING) << "Zip: bad uncompressed length in zip "
-                 << "(" << data_offset << " + " << GetUncompressedLength()
-                 << " > " << dir_offset << ")";
-    return -1;
-  }
-
-  return data_offset;
-}
-
-static bool CopyFdToMemory(uint8_t* begin, size_t size, int in, size_t count) {
-  uint8_t* dst = begin;
-  std::vector<uint8_t> buf(kBufSize);
-  while (count != 0) {
-    size_t bytes_to_read = (count > kBufSize) ? kBufSize : count;
-    ssize_t actual = TEMP_FAILURE_RETRY(read(in, &buf[0], bytes_to_read));
-    if (actual != static_cast<ssize_t>(bytes_to_read)) {
-      PLOG(WARNING) << "Zip: short read";
-      return false;
-    }
-    memcpy(dst, &buf[0], bytes_to_read);
-    dst += bytes_to_read;
-    count -= bytes_to_read;
-  }
-  DCHECK_EQ(dst, begin + size);
-  return true;
-}
-
-class ZStream {
- public:
-  ZStream(byte* write_buf, size_t write_buf_size) {
-    // Initialize the zlib stream struct.
-    memset(&zstream_, 0, sizeof(zstream_));
-    zstream_.zalloc = Z_NULL;
-    zstream_.zfree = Z_NULL;
-    zstream_.opaque = Z_NULL;
-    zstream_.next_in = NULL;
-    zstream_.avail_in = 0;
-    zstream_.next_out = reinterpret_cast<Bytef*>(write_buf);
-    zstream_.avail_out = write_buf_size;
-    zstream_.data_type = Z_UNKNOWN;
-  }
-
-  z_stream& Get() {
-    return zstream_;
-  }
-
-  ~ZStream() {
-    inflateEnd(&zstream_);
-  }
- private:
-  z_stream zstream_;
-};
-
-static bool InflateToMemory(uint8_t* begin, size_t size,
-                            int in, size_t uncompressed_length, size_t compressed_length) {
-  uint8_t* dst = begin;
-  UniquePtr<uint8_t[]> read_buf(new uint8_t[kBufSize]);
-  UniquePtr<uint8_t[]> write_buf(new uint8_t[kBufSize]);
-  if (read_buf.get() == NULL || write_buf.get() == NULL) {
-    LOG(WARNING) << "Zip: failed to allocate buffer to inflate";
-    return false;
-  }
-
-  UniquePtr<ZStream> zstream(new ZStream(write_buf.get(), kBufSize));
-
-  // Use the undocumented "negative window bits" feature to tell zlib
-  // that there's no zlib header waiting for it.
-  int zerr = inflateInit2(&zstream->Get(), -MAX_WBITS);
-  if (zerr != Z_OK) {
-    if (zerr == Z_VERSION_ERROR) {
-      LOG(ERROR) << "Installed zlib is not compatible with linked version (" << ZLIB_VERSION << ")";
-    } else {
-      LOG(WARNING) << "Call to inflateInit2 failed (zerr=" << zerr << ")";
-    }
-    return false;
-  }
-
-  size_t remaining = compressed_length;
-  do {
-    // read as much as we can
-    if (zstream->Get().avail_in == 0) {
-      size_t bytes_to_read = (remaining > kBufSize) ? kBufSize : remaining;
-
-        ssize_t actual = TEMP_FAILURE_RETRY(read(in, read_buf.get(), bytes_to_read));
-        if (actual != static_cast<ssize_t>(bytes_to_read)) {
-          LOG(WARNING) << "Zip: inflate read failed (" << actual << " vs " << bytes_to_read << ")";
-          return false;
-        }
-        remaining -= bytes_to_read;
-        zstream->Get().next_in = read_buf.get();
-        zstream->Get().avail_in = bytes_to_read;
-    }
-
-    // uncompress the data
-    zerr = inflate(&zstream->Get(), Z_NO_FLUSH);
-    if (zerr != Z_OK && zerr != Z_STREAM_END) {
-      LOG(WARNING) << "Zip: inflate zerr=" << zerr
-                   << " (next_in=" << zstream->Get().next_in
-                   << " avail_in=" << zstream->Get().avail_in
-                   << " next_out=" << zstream->Get().next_out
-                   << " avail_out=" << zstream->Get().avail_out
-                   << ")";
-      return false;
-    }
-
-    // write when we're full or when we're done
-    if (zstream->Get().avail_out == 0 ||
-        (zerr == Z_STREAM_END && zstream->Get().avail_out != kBufSize)) {
-      size_t bytes_to_write = zstream->Get().next_out - write_buf.get();
-      memcpy(dst, write_buf.get(), bytes_to_write);
-      dst += bytes_to_write;
-      zstream->Get().next_out = write_buf.get();
-      zstream->Get().avail_out = kBufSize;
-    }
-  } while (zerr == Z_OK);
-
-  DCHECK_EQ(zerr, Z_STREAM_END);  // other errors should've been caught
-
-  // paranoia
-  if (zstream->Get().total_out != uncompressed_length) {
-    LOG(WARNING) << "Zip: size mismatch on inflated file ("
-                 << zstream->Get().total_out << " vs " << uncompressed_length << ")";
-    return false;
-  }
-
-  DCHECK_EQ(dst, begin + size);
-  return true;
-}
 
 bool ZipEntry::ExtractToFile(File& file, std::string* error_msg) {
-  uint32_t length = GetUncompressedLength();
-  int result = TEMP_FAILURE_RETRY(ftruncate(file.Fd(), length));
-  if (result == -1) {
-    *error_msg = StringPrintf("Zip: failed to ftruncate '%s' to length %ud", file.GetPath().c_str(),
-                              length);
+  const int32_t error = ExtractEntryToFile(handle_, zip_entry_, file.Fd());
+  if (error) {
+    *error_msg = std::string(ErrorCodeString(error));
     return false;
   }
 
-  UniquePtr<MemMap> map(MemMap::MapFile(length, PROT_READ | PROT_WRITE, MAP_SHARED, file.Fd(), 0,
-                                        file.GetPath().c_str(), error_msg));
-  if (map.get() == NULL) {
-    *error_msg = StringPrintf("Zip: failed to mmap space for '%s': %s", file.GetPath().c_str(),
-                              error_msg->c_str());
-    return false;
-  }
-
-  return ExtractToMemory(map->Begin(), map->Size(), error_msg);
-}
-
-bool ZipEntry::ExtractToMemory(uint8_t* begin, size_t size, std::string* error_msg) {
-  // If size is zero, data offset will be meaningless, so bail out early.
-  if (size == 0) {
-    return true;
-  }
-  off64_t data_offset = GetDataOffset();
-  if (data_offset == -1) {
-    *error_msg = StringPrintf("Zip: data_offset=%lld", data_offset);
-    return false;
-  }
-  if (lseek64(zip_archive_->fd_, data_offset, SEEK_SET) != data_offset) {
-    *error_msg = StringPrintf("Zip: lseek to data at %lld failed", data_offset);
-    return false;
-  }
-
-  // TODO: this doesn't verify the data's CRC, but probably should (especially
-  // for uncompressed data).
-  switch (GetCompressionMethod()) {
-    case kCompressStored:
-      return CopyFdToMemory(begin, size, zip_archive_->fd_, GetUncompressedLength());
-    case kCompressDeflated:
-      return InflateToMemory(begin, size, zip_archive_->fd_,
-                             GetUncompressedLength(), GetCompressedLength());
-    default:
-      *error_msg = StringPrintf("Zip: unknown compression method 0x%x", GetCompressionMethod());
-      return false;
-  }
+  return true;
 }
 
 MemMap* ZipEntry::ExtractToMemMap(const char* entry_filename, std::string* error_msg) {
@@ -303,18 +54,18 @@
   name += " extracted in memory from ";
   name += entry_filename;
   UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
-                                             NULL,
-                                             GetUncompressedLength(),
+                                             NULL, GetUncompressedLength(),
                                              PROT_READ | PROT_WRITE, error_msg));
   if (map.get() == nullptr) {
     DCHECK(!error_msg->empty());
-    return NULL;
+    return nullptr;
   }
 
-  bool success = ExtractToMemory(map->Begin(), map->Size(), error_msg);
-  if (!success) {
-    LOG(ERROR) << "Zip: Failed to extract '" << entry_filename << "' to memory";
-    return NULL;
+  const int32_t error = ExtractToMemory(handle_, zip_entry_,
+                                        map->Begin(), map->Size());
+  if (error) {
+    *error_msg = std::string(ErrorCodeString(error));
+    return nullptr;
   }
 
   return map.release();
@@ -336,238 +87,47 @@
 
 ZipArchive* ZipArchive::Open(const char* filename, std::string* error_msg) {
   DCHECK(filename != nullptr);
-  int fd = open(filename, O_RDONLY, 0);
-  if (fd == -1) {
-    *error_msg = StringPrintf("Zip: unable to open '%s': %s", filename, strerror(errno));
-    return NULL;
+
+  ZipArchiveHandle handle;
+  const int32_t error = OpenArchive(filename, &handle);
+  if (error) {
+    *error_msg = std::string(ErrorCodeString(error));
+    CloseArchive(handle);
+    return nullptr;
   }
-  return OpenFromFd(fd, filename, error_msg);
+
+  SetCloseOnExec(GetFileDescriptor(handle));
+  return new ZipArchive(handle);
 }
 
 ZipArchive* ZipArchive::OpenFromFd(int fd, const char* filename, std::string* error_msg) {
-  SetCloseOnExec(fd);
-  UniquePtr<ZipArchive> zip_archive(new ZipArchive(fd, filename));
-  CHECK(zip_archive.get() != nullptr);
-  if (!zip_archive->MapCentralDirectory(error_msg)) {
-      zip_archive->Close();
-      return NULL;
+  DCHECK(filename != nullptr);
+  DCHECK_GT(fd, 0);
+
+  ZipArchiveHandle handle;
+  const int32_t error = OpenArchiveFd(fd, filename, &handle);
+  if (error) {
+    *error_msg = std::string(ErrorCodeString(error));
+    CloseArchive(handle);
+    return nullptr;
   }
-  if (!zip_archive->Parse(error_msg)) {
-      zip_archive->Close();
-      return NULL;
-  }
-  return zip_archive.release();
+
+  SetCloseOnExec(GetFileDescriptor(handle));
+  return new ZipArchive(handle);
 }
 
-ZipEntry* ZipArchive::Find(const char* name) const {
-  DCHECK(name != NULL);
-  DirEntries::const_iterator it = dir_entries_.find(name);
-  if (it == dir_entries_.end()) {
-    return NULL;
-  }
-  return new ZipEntry(this, (*it).second);
-}
+ZipEntry* ZipArchive::Find(const char* name, std::string* error_msg) const {
+  DCHECK(name != nullptr);
 
-void ZipArchive::Close() {
-  if (fd_ != -1) {
-    close(fd_);
-  }
-  fd_ = -1;
-  num_entries_ = 0;
-  dir_offset_ = 0;
-}
-
-std::string ZipArchive::ErrorStringPrintf(const char* fmt, ...) {
-  va_list ap;
-  va_start(ap, fmt);
-  std::string result(StringPrintf("Zip '%s' : ", filename_.c_str()));
-  StringAppendV(&result, fmt, ap);
-  va_end(ap);
-  return result;
-}
-
-// Find the zip Central Directory and memory-map it.
-//
-// On success, returns true after populating fields from the EOCD area:
-//   num_entries_
-//   dir_offset_
-//   dir_map_
-bool ZipArchive::MapCentralDirectory(std::string* error_msg) {
-  /*
-   * Get and test file length.
-   */
-  off64_t file_length = lseek64(fd_, 0, SEEK_END);
-  if (file_length < kEOCDLen) {
-    *error_msg = ErrorStringPrintf("length %lld is too small to be zip", file_length);
-    return false;
+  // Resist the urge to delete the space. <: is a bigraph sequence.
+  UniquePtr< ::ZipEntry> zip_entry(new ::ZipEntry);
+  const int32_t error = FindEntry(handle_, name, zip_entry.get());
+  if (error) {
+    *error_msg = std::string(ErrorCodeString(error));
+    return nullptr;
   }
 
-  size_t read_amount = kMaxEOCDSearch;
-  if (file_length < off64_t(read_amount)) {
-    read_amount = file_length;
-  }
-
-  UniquePtr<uint8_t[]> scan_buf(new uint8_t[read_amount]);
-  CHECK(scan_buf.get() != nullptr);
-
-  /*
-   * Make sure this is a Zip archive.
-   */
-  if (lseek64(fd_, 0, SEEK_SET) != 0) {
-    *error_msg = ErrorStringPrintf("seek to start failed: %s", strerror(errno));
-    return false;
-  }
-
-  ssize_t actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), sizeof(int32_t)));
-  if (actual != static_cast<ssize_t>(sizeof(int32_t))) {
-    *error_msg = ErrorStringPrintf("couldn\'t read first signature from zip archive: %s",
-                                   strerror(errno));
-    return false;
-  }
-
-  unsigned int header = Le32ToHost(scan_buf.get());
-  if (header != kLFHSignature) {
-    *error_msg = ErrorStringPrintf("not a zip archive (found 0x%x)", header);
-    return false;
-  }
-
-  // Perform the traditional EOCD snipe hunt.
-  //
-  // We're searching for the End of Central Directory magic number,
-  // which appears at the start of the EOCD block.  It's followed by
-  // 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
-  // need to read the last part of the file into a buffer, dig through
-  // it to find the magic number, parse some values out, and use those
-  // to determine the extent of the CD.
-  //
-  // We start by pulling in the last part of the file.
-  off64_t search_start = file_length - read_amount;
-
-  if (lseek64(fd_, search_start, SEEK_SET) != search_start) {
-    *error_msg = ErrorStringPrintf("seek %lld failed: %s", search_start, strerror(errno));
-    return false;
-  }
-  actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), read_amount));
-  if (actual != static_cast<ssize_t>(read_amount)) {
-    *error_msg = ErrorStringPrintf("read %lld, expected %zd. %s", search_start, read_amount,
-                                   strerror(errno));
-    return false;
-  }
-
-
-  // Scan backward for the EOCD magic.  In an archive without a trailing
-  // comment, we'll find it on the first try.  (We may want to consider
-  // doing an initial minimal read; if we don't find it, retry with a
-  // second read as above.)
-  int i;
-  for (i = read_amount - kEOCDLen; i >= 0; i--) {
-    if (scan_buf.get()[i] == 0x50 && Le32ToHost(&(scan_buf.get())[i]) == kEOCDSignature) {
-      break;
-    }
-  }
-  if (i < 0) {
-    *error_msg = ErrorStringPrintf("EOCD not found, not a zip file");
-    return false;
-  }
-
-  off64_t eocd_offset = search_start + i;
-  const byte* eocd_ptr = scan_buf.get() + i;
-
-  CHECK(eocd_offset < file_length);
-
-  // Grab the CD offset and size, and the number of entries in the
-  // archive.  Verify that they look reasonable.
-  uint16_t disk_number = Le16ToHost(eocd_ptr + kEOCDDiskNumber);
-  uint16_t disk_with_central_dir = Le16ToHost(eocd_ptr + kEOCDDiskNumberForCD);
-  uint16_t num_entries = Le16ToHost(eocd_ptr + kEOCDNumEntries);
-  uint16_t total_num_entries = Le16ToHost(eocd_ptr + kEOCDTotalNumEntries);
-  uint32_t dir_size = Le32ToHost(eocd_ptr + kEOCDSize);
-  uint32_t dir_offset = Le32ToHost(eocd_ptr + kEOCDFileOffset);
-  uint16_t comment_size = Le16ToHost(eocd_ptr + kEOCDCommentSize);
-
-  if ((uint64_t) dir_offset + (uint64_t) dir_size > (uint64_t) eocd_offset) {
-    *error_msg = ErrorStringPrintf("bad offsets (dir=%ud, size=%ud, eocd=%lld)",
-                                   dir_offset, dir_size, eocd_offset);
-    return false;
-  }
-  if (num_entries == 0) {
-    *error_msg = ErrorStringPrintf("empty archive?");
-    return false;
-  } else if (num_entries != total_num_entries || disk_number != 0 || disk_with_central_dir != 0) {
-    *error_msg = ErrorStringPrintf("spanned archives not supported");
-    return false;
-  }
-
-  // Check to see if comment is a sane size
-  if ((comment_size > (file_length - kEOCDLen))
-      || (eocd_offset > (file_length - kEOCDLen) - comment_size)) {
-    *error_msg = ErrorStringPrintf("comment size runs off end of file");
-    return false;
-  }
-
-  // It all looks good.  Create a mapping for the CD.
-  dir_map_.reset(MemMap::MapFile(dir_size, PROT_READ, MAP_SHARED, fd_, dir_offset,
-                                 filename_.c_str(), error_msg));
-  if (dir_map_.get() == NULL) {
-    return false;
-  }
-
-  num_entries_ = num_entries;
-  dir_offset_ = dir_offset;
-  return true;
-}
-
-bool ZipArchive::Parse(std::string* error_msg) {
-  const byte* cd_ptr = dir_map_->Begin();
-  size_t cd_length = dir_map_->Size();
-
-  // Walk through the central directory, adding entries to the hash
-  // table and verifying values.
-  const byte* ptr = cd_ptr;
-  for (int i = 0; i < num_entries_; i++) {
-    if (Le32ToHost(ptr) != kCDESignature) {
-      *error_msg = ErrorStringPrintf("missed a central dir sig (at %d)", i);
-      return false;
-    }
-    if (ptr + kCDELen > cd_ptr + cd_length) {
-      *error_msg = ErrorStringPrintf("ran off the end (at %d)", i);
-      return false;
-    }
-
-    int64_t local_hdr_offset = Le32ToHost(ptr + kCDELocalOffset);
-    if (local_hdr_offset >= dir_offset_) {
-      *error_msg = ErrorStringPrintf("bad LFH offset %lld at entry %d", local_hdr_offset, i);
-      return false;
-    }
-
-    uint16_t gpbf = Le16ToHost(ptr + kCDEGPBFlags);
-    if ((gpbf & kGPFUnsupportedMask) != 0) {
-      *error_msg = ErrorStringPrintf("invalid general purpose bit flag %x", gpbf);
-      return false;
-    }
-
-    uint16_t name_len = Le16ToHost(ptr + kCDENameLen);
-    uint16_t extra_len = Le16ToHost(ptr + kCDEExtraLen);
-    uint16_t comment_len = Le16ToHost(ptr + kCDECommentLen);
-
-    // add the CDE filename to the hash table
-    const char* name = reinterpret_cast<const char*>(ptr + kCDELen);
-
-    // Check name for NULL characters
-    if (memchr(name, 0, name_len) != NULL) {
-      *error_msg = ErrorStringPrintf("filename contains NUL byte");
-      return false;
-    }
-
-    dir_entries_.Put(StringPiece(name, name_len), ptr);
-    ptr += kCDELen + name_len + extra_len + comment_len;
-    if (ptr > cd_ptr + cd_length) {
-      *error_msg = ErrorStringPrintf("bad CD advance (%p vs %p) at entry %d",
-                                     ptr, cd_ptr + cd_length, i);
-      return false;
-    }
-  }
-  return true;
+  return new ZipEntry(handle_, zip_entry.release());
 }
 
 }  // namespace art