Use libziparchive for art zip processing.
This is part of the effort to move all VM & framework
zip parsing to a common implementation. This also has
the side effect of fixing various TODOs related to
crc32 checking.
bug: 10193060
Change-Id: I407f9ad5a94fc91d96ff43556adde00a00df1f14
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index db273ec..8cb1993 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -30,272 +30,23 @@
namespace art {
-static const size_t kBufSize = 32 * KB;
-
-// Get 2 little-endian bytes.
-static uint32_t Le16ToHost(const byte* src) {
- return ((src[0] << 0) |
- (src[1] << 8));
-}
-
-// Get 4 little-endian bytes.
-static uint32_t Le32ToHost(const byte* src) {
- return ((src[0] << 0) |
- (src[1] << 8) |
- (src[2] << 16) |
- (src[3] << 24));
-}
-
-uint16_t ZipEntry::GetCompressionMethod() {
- return Le16ToHost(ptr_ + ZipArchive::kCDEMethod);
-}
-
-uint32_t ZipEntry::GetCompressedLength() {
- return Le32ToHost(ptr_ + ZipArchive::kCDECompLen);
-}
-
uint32_t ZipEntry::GetUncompressedLength() {
- return Le32ToHost(ptr_ + ZipArchive::kCDEUncompLen);
+ return zip_entry_->uncompressed_length;
}
uint32_t ZipEntry::GetCrc32() {
- return Le32ToHost(ptr_ + ZipArchive::kCDECRC);
+ return zip_entry_->crc32;
}
-off64_t ZipEntry::GetDataOffset() {
- // All we have is the offset to the Local File Header, which is
- // variable size, so we have to read the contents of the struct to
- // figure out where the actual data starts.
-
- // We also need to make sure that the lengths are not so large that
- // somebody trying to map the compressed or uncompressed data runs
- // off the end of the mapped region.
-
- off64_t dir_offset = zip_archive_->dir_offset_;
- int64_t lfh_offset = Le32ToHost(ptr_ + ZipArchive::kCDELocalOffset);
- if (lfh_offset + ZipArchive::kLFHLen >= dir_offset) {
- LOG(WARNING) << "Zip: bad LFH offset in zip";
- return -1;
- }
-
- if (lseek64(zip_archive_->fd_, lfh_offset, SEEK_SET) != lfh_offset) {
- PLOG(WARNING) << "Zip: failed seeking to LFH at offset " << lfh_offset;
- return -1;
- }
-
- uint8_t lfh_buf[ZipArchive::kLFHLen];
- ssize_t actual = TEMP_FAILURE_RETRY(read(zip_archive_->fd_, lfh_buf, sizeof(lfh_buf)));
- if (actual != sizeof(lfh_buf)) {
- LOG(WARNING) << "Zip: failed reading LFH from offset " << lfh_offset;
- return -1;
- }
-
- if (Le32ToHost(lfh_buf) != ZipArchive::kLFHSignature) {
- LOG(WARNING) << "Zip: didn't find signature at start of LFH, offset " << lfh_offset;
- return -1;
- }
-
- uint32_t gpbf = Le16ToHost(lfh_buf + ZipArchive::kLFHGPBFlags);
- if ((gpbf & ZipArchive::kGPFUnsupportedMask) != 0) {
- LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf;
- return -1;
- }
-
- off64_t data_offset = (lfh_offset + ZipArchive::kLFHLen
- + Le16ToHost(lfh_buf + ZipArchive::kLFHNameLen)
- + Le16ToHost(lfh_buf + ZipArchive::kLFHExtraLen));
- if (data_offset >= dir_offset) {
- LOG(WARNING) << "Zip: bad data offset " << data_offset << " in zip";
- return -1;
- }
-
- // check lengths
-
- if (static_cast<off64_t>(data_offset + GetCompressedLength()) > dir_offset) {
- LOG(WARNING) << "Zip: bad compressed length in zip "
- << "(" << data_offset << " + " << GetCompressedLength()
- << " > " << dir_offset << ")";
- return -1;
- }
-
- if (GetCompressionMethod() == kCompressStored
- && static_cast<off64_t>(data_offset + GetUncompressedLength()) > dir_offset) {
- LOG(WARNING) << "Zip: bad uncompressed length in zip "
- << "(" << data_offset << " + " << GetUncompressedLength()
- << " > " << dir_offset << ")";
- return -1;
- }
-
- return data_offset;
-}
-
-static bool CopyFdToMemory(uint8_t* begin, size_t size, int in, size_t count) {
- uint8_t* dst = begin;
- std::vector<uint8_t> buf(kBufSize);
- while (count != 0) {
- size_t bytes_to_read = (count > kBufSize) ? kBufSize : count;
- ssize_t actual = TEMP_FAILURE_RETRY(read(in, &buf[0], bytes_to_read));
- if (actual != static_cast<ssize_t>(bytes_to_read)) {
- PLOG(WARNING) << "Zip: short read";
- return false;
- }
- memcpy(dst, &buf[0], bytes_to_read);
- dst += bytes_to_read;
- count -= bytes_to_read;
- }
- DCHECK_EQ(dst, begin + size);
- return true;
-}
-
-class ZStream {
- public:
- ZStream(byte* write_buf, size_t write_buf_size) {
- // Initialize the zlib stream struct.
- memset(&zstream_, 0, sizeof(zstream_));
- zstream_.zalloc = Z_NULL;
- zstream_.zfree = Z_NULL;
- zstream_.opaque = Z_NULL;
- zstream_.next_in = NULL;
- zstream_.avail_in = 0;
- zstream_.next_out = reinterpret_cast<Bytef*>(write_buf);
- zstream_.avail_out = write_buf_size;
- zstream_.data_type = Z_UNKNOWN;
- }
-
- z_stream& Get() {
- return zstream_;
- }
-
- ~ZStream() {
- inflateEnd(&zstream_);
- }
- private:
- z_stream zstream_;
-};
-
-static bool InflateToMemory(uint8_t* begin, size_t size,
- int in, size_t uncompressed_length, size_t compressed_length) {
- uint8_t* dst = begin;
- UniquePtr<uint8_t[]> read_buf(new uint8_t[kBufSize]);
- UniquePtr<uint8_t[]> write_buf(new uint8_t[kBufSize]);
- if (read_buf.get() == NULL || write_buf.get() == NULL) {
- LOG(WARNING) << "Zip: failed to allocate buffer to inflate";
- return false;
- }
-
- UniquePtr<ZStream> zstream(new ZStream(write_buf.get(), kBufSize));
-
- // Use the undocumented "negative window bits" feature to tell zlib
- // that there's no zlib header waiting for it.
- int zerr = inflateInit2(&zstream->Get(), -MAX_WBITS);
- if (zerr != Z_OK) {
- if (zerr == Z_VERSION_ERROR) {
- LOG(ERROR) << "Installed zlib is not compatible with linked version (" << ZLIB_VERSION << ")";
- } else {
- LOG(WARNING) << "Call to inflateInit2 failed (zerr=" << zerr << ")";
- }
- return false;
- }
-
- size_t remaining = compressed_length;
- do {
- // read as much as we can
- if (zstream->Get().avail_in == 0) {
- size_t bytes_to_read = (remaining > kBufSize) ? kBufSize : remaining;
-
- ssize_t actual = TEMP_FAILURE_RETRY(read(in, read_buf.get(), bytes_to_read));
- if (actual != static_cast<ssize_t>(bytes_to_read)) {
- LOG(WARNING) << "Zip: inflate read failed (" << actual << " vs " << bytes_to_read << ")";
- return false;
- }
- remaining -= bytes_to_read;
- zstream->Get().next_in = read_buf.get();
- zstream->Get().avail_in = bytes_to_read;
- }
-
- // uncompress the data
- zerr = inflate(&zstream->Get(), Z_NO_FLUSH);
- if (zerr != Z_OK && zerr != Z_STREAM_END) {
- LOG(WARNING) << "Zip: inflate zerr=" << zerr
- << " (next_in=" << zstream->Get().next_in
- << " avail_in=" << zstream->Get().avail_in
- << " next_out=" << zstream->Get().next_out
- << " avail_out=" << zstream->Get().avail_out
- << ")";
- return false;
- }
-
- // write when we're full or when we're done
- if (zstream->Get().avail_out == 0 ||
- (zerr == Z_STREAM_END && zstream->Get().avail_out != kBufSize)) {
- size_t bytes_to_write = zstream->Get().next_out - write_buf.get();
- memcpy(dst, write_buf.get(), bytes_to_write);
- dst += bytes_to_write;
- zstream->Get().next_out = write_buf.get();
- zstream->Get().avail_out = kBufSize;
- }
- } while (zerr == Z_OK);
-
- DCHECK_EQ(zerr, Z_STREAM_END); // other errors should've been caught
-
- // paranoia
- if (zstream->Get().total_out != uncompressed_length) {
- LOG(WARNING) << "Zip: size mismatch on inflated file ("
- << zstream->Get().total_out << " vs " << uncompressed_length << ")";
- return false;
- }
-
- DCHECK_EQ(dst, begin + size);
- return true;
-}
bool ZipEntry::ExtractToFile(File& file, std::string* error_msg) {
- uint32_t length = GetUncompressedLength();
- int result = TEMP_FAILURE_RETRY(ftruncate(file.Fd(), length));
- if (result == -1) {
- *error_msg = StringPrintf("Zip: failed to ftruncate '%s' to length %ud", file.GetPath().c_str(),
- length);
+ const int32_t error = ExtractEntryToFile(handle_, zip_entry_, file.Fd());
+ if (error) {
+ *error_msg = std::string(ErrorCodeString(error));
return false;
}
- UniquePtr<MemMap> map(MemMap::MapFile(length, PROT_READ | PROT_WRITE, MAP_SHARED, file.Fd(), 0,
- file.GetPath().c_str(), error_msg));
- if (map.get() == NULL) {
- *error_msg = StringPrintf("Zip: failed to mmap space for '%s': %s", file.GetPath().c_str(),
- error_msg->c_str());
- return false;
- }
-
- return ExtractToMemory(map->Begin(), map->Size(), error_msg);
-}
-
-bool ZipEntry::ExtractToMemory(uint8_t* begin, size_t size, std::string* error_msg) {
- // If size is zero, data offset will be meaningless, so bail out early.
- if (size == 0) {
- return true;
- }
- off64_t data_offset = GetDataOffset();
- if (data_offset == -1) {
- *error_msg = StringPrintf("Zip: data_offset=%lld", data_offset);
- return false;
- }
- if (lseek64(zip_archive_->fd_, data_offset, SEEK_SET) != data_offset) {
- *error_msg = StringPrintf("Zip: lseek to data at %lld failed", data_offset);
- return false;
- }
-
- // TODO: this doesn't verify the data's CRC, but probably should (especially
- // for uncompressed data).
- switch (GetCompressionMethod()) {
- case kCompressStored:
- return CopyFdToMemory(begin, size, zip_archive_->fd_, GetUncompressedLength());
- case kCompressDeflated:
- return InflateToMemory(begin, size, zip_archive_->fd_,
- GetUncompressedLength(), GetCompressedLength());
- default:
- *error_msg = StringPrintf("Zip: unknown compression method 0x%x", GetCompressionMethod());
- return false;
- }
+ return true;
}
MemMap* ZipEntry::ExtractToMemMap(const char* entry_filename, std::string* error_msg) {
@@ -303,18 +54,18 @@
name += " extracted in memory from ";
name += entry_filename;
UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
- NULL,
- GetUncompressedLength(),
+ NULL, GetUncompressedLength(),
PROT_READ | PROT_WRITE, error_msg));
if (map.get() == nullptr) {
DCHECK(!error_msg->empty());
- return NULL;
+ return nullptr;
}
- bool success = ExtractToMemory(map->Begin(), map->Size(), error_msg);
- if (!success) {
- LOG(ERROR) << "Zip: Failed to extract '" << entry_filename << "' to memory";
- return NULL;
+ const int32_t error = ExtractToMemory(handle_, zip_entry_,
+ map->Begin(), map->Size());
+ if (error) {
+ *error_msg = std::string(ErrorCodeString(error));
+ return nullptr;
}
return map.release();
@@ -336,238 +87,47 @@
ZipArchive* ZipArchive::Open(const char* filename, std::string* error_msg) {
DCHECK(filename != nullptr);
- int fd = open(filename, O_RDONLY, 0);
- if (fd == -1) {
- *error_msg = StringPrintf("Zip: unable to open '%s': %s", filename, strerror(errno));
- return NULL;
+
+ ZipArchiveHandle handle;
+ const int32_t error = OpenArchive(filename, &handle);
+ if (error) {
+ *error_msg = std::string(ErrorCodeString(error));
+ CloseArchive(handle);
+ return nullptr;
}
- return OpenFromFd(fd, filename, error_msg);
+
+ SetCloseOnExec(GetFileDescriptor(handle));
+ return new ZipArchive(handle);
}
ZipArchive* ZipArchive::OpenFromFd(int fd, const char* filename, std::string* error_msg) {
- SetCloseOnExec(fd);
- UniquePtr<ZipArchive> zip_archive(new ZipArchive(fd, filename));
- CHECK(zip_archive.get() != nullptr);
- if (!zip_archive->MapCentralDirectory(error_msg)) {
- zip_archive->Close();
- return NULL;
+ DCHECK(filename != nullptr);
+ DCHECK_GT(fd, 0);
+
+ ZipArchiveHandle handle;
+ const int32_t error = OpenArchiveFd(fd, filename, &handle);
+ if (error) {
+ *error_msg = std::string(ErrorCodeString(error));
+ CloseArchive(handle);
+ return nullptr;
}
- if (!zip_archive->Parse(error_msg)) {
- zip_archive->Close();
- return NULL;
- }
- return zip_archive.release();
+
+ SetCloseOnExec(GetFileDescriptor(handle));
+ return new ZipArchive(handle);
}
-ZipEntry* ZipArchive::Find(const char* name) const {
- DCHECK(name != NULL);
- DirEntries::const_iterator it = dir_entries_.find(name);
- if (it == dir_entries_.end()) {
- return NULL;
- }
- return new ZipEntry(this, (*it).second);
-}
+ZipEntry* ZipArchive::Find(const char* name, std::string* error_msg) const {
+ DCHECK(name != nullptr);
-void ZipArchive::Close() {
- if (fd_ != -1) {
- close(fd_);
- }
- fd_ = -1;
- num_entries_ = 0;
- dir_offset_ = 0;
-}
-
-std::string ZipArchive::ErrorStringPrintf(const char* fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- std::string result(StringPrintf("Zip '%s' : ", filename_.c_str()));
- StringAppendV(&result, fmt, ap);
- va_end(ap);
- return result;
-}
-
-// Find the zip Central Directory and memory-map it.
-//
-// On success, returns true after populating fields from the EOCD area:
-// num_entries_
-// dir_offset_
-// dir_map_
-bool ZipArchive::MapCentralDirectory(std::string* error_msg) {
- /*
- * Get and test file length.
- */
- off64_t file_length = lseek64(fd_, 0, SEEK_END);
- if (file_length < kEOCDLen) {
- *error_msg = ErrorStringPrintf("length %lld is too small to be zip", file_length);
- return false;
+ // Resist the urge to delete the space. <: is a bigraph sequence.
+ UniquePtr< ::ZipEntry> zip_entry(new ::ZipEntry);
+ const int32_t error = FindEntry(handle_, name, zip_entry.get());
+ if (error) {
+ *error_msg = std::string(ErrorCodeString(error));
+ return nullptr;
}
- size_t read_amount = kMaxEOCDSearch;
- if (file_length < off64_t(read_amount)) {
- read_amount = file_length;
- }
-
- UniquePtr<uint8_t[]> scan_buf(new uint8_t[read_amount]);
- CHECK(scan_buf.get() != nullptr);
-
- /*
- * Make sure this is a Zip archive.
- */
- if (lseek64(fd_, 0, SEEK_SET) != 0) {
- *error_msg = ErrorStringPrintf("seek to start failed: %s", strerror(errno));
- return false;
- }
-
- ssize_t actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), sizeof(int32_t)));
- if (actual != static_cast<ssize_t>(sizeof(int32_t))) {
- *error_msg = ErrorStringPrintf("couldn\'t read first signature from zip archive: %s",
- strerror(errno));
- return false;
- }
-
- unsigned int header = Le32ToHost(scan_buf.get());
- if (header != kLFHSignature) {
- *error_msg = ErrorStringPrintf("not a zip archive (found 0x%x)", header);
- return false;
- }
-
- // Perform the traditional EOCD snipe hunt.
- //
- // We're searching for the End of Central Directory magic number,
- // which appears at the start of the EOCD block. It's followed by
- // 18 bytes of EOCD stuff and up to 64KB of archive comment. We
- // need to read the last part of the file into a buffer, dig through
- // it to find the magic number, parse some values out, and use those
- // to determine the extent of the CD.
- //
- // We start by pulling in the last part of the file.
- off64_t search_start = file_length - read_amount;
-
- if (lseek64(fd_, search_start, SEEK_SET) != search_start) {
- *error_msg = ErrorStringPrintf("seek %lld failed: %s", search_start, strerror(errno));
- return false;
- }
- actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), read_amount));
- if (actual != static_cast<ssize_t>(read_amount)) {
- *error_msg = ErrorStringPrintf("read %lld, expected %zd. %s", search_start, read_amount,
- strerror(errno));
- return false;
- }
-
-
- // Scan backward for the EOCD magic. In an archive without a trailing
- // comment, we'll find it on the first try. (We may want to consider
- // doing an initial minimal read; if we don't find it, retry with a
- // second read as above.)
- int i;
- for (i = read_amount - kEOCDLen; i >= 0; i--) {
- if (scan_buf.get()[i] == 0x50 && Le32ToHost(&(scan_buf.get())[i]) == kEOCDSignature) {
- break;
- }
- }
- if (i < 0) {
- *error_msg = ErrorStringPrintf("EOCD not found, not a zip file");
- return false;
- }
-
- off64_t eocd_offset = search_start + i;
- const byte* eocd_ptr = scan_buf.get() + i;
-
- CHECK(eocd_offset < file_length);
-
- // Grab the CD offset and size, and the number of entries in the
- // archive. Verify that they look reasonable.
- uint16_t disk_number = Le16ToHost(eocd_ptr + kEOCDDiskNumber);
- uint16_t disk_with_central_dir = Le16ToHost(eocd_ptr + kEOCDDiskNumberForCD);
- uint16_t num_entries = Le16ToHost(eocd_ptr + kEOCDNumEntries);
- uint16_t total_num_entries = Le16ToHost(eocd_ptr + kEOCDTotalNumEntries);
- uint32_t dir_size = Le32ToHost(eocd_ptr + kEOCDSize);
- uint32_t dir_offset = Le32ToHost(eocd_ptr + kEOCDFileOffset);
- uint16_t comment_size = Le16ToHost(eocd_ptr + kEOCDCommentSize);
-
- if ((uint64_t) dir_offset + (uint64_t) dir_size > (uint64_t) eocd_offset) {
- *error_msg = ErrorStringPrintf("bad offsets (dir=%ud, size=%ud, eocd=%lld)",
- dir_offset, dir_size, eocd_offset);
- return false;
- }
- if (num_entries == 0) {
- *error_msg = ErrorStringPrintf("empty archive?");
- return false;
- } else if (num_entries != total_num_entries || disk_number != 0 || disk_with_central_dir != 0) {
- *error_msg = ErrorStringPrintf("spanned archives not supported");
- return false;
- }
-
- // Check to see if comment is a sane size
- if ((comment_size > (file_length - kEOCDLen))
- || (eocd_offset > (file_length - kEOCDLen) - comment_size)) {
- *error_msg = ErrorStringPrintf("comment size runs off end of file");
- return false;
- }
-
- // It all looks good. Create a mapping for the CD.
- dir_map_.reset(MemMap::MapFile(dir_size, PROT_READ, MAP_SHARED, fd_, dir_offset,
- filename_.c_str(), error_msg));
- if (dir_map_.get() == NULL) {
- return false;
- }
-
- num_entries_ = num_entries;
- dir_offset_ = dir_offset;
- return true;
-}
-
-bool ZipArchive::Parse(std::string* error_msg) {
- const byte* cd_ptr = dir_map_->Begin();
- size_t cd_length = dir_map_->Size();
-
- // Walk through the central directory, adding entries to the hash
- // table and verifying values.
- const byte* ptr = cd_ptr;
- for (int i = 0; i < num_entries_; i++) {
- if (Le32ToHost(ptr) != kCDESignature) {
- *error_msg = ErrorStringPrintf("missed a central dir sig (at %d)", i);
- return false;
- }
- if (ptr + kCDELen > cd_ptr + cd_length) {
- *error_msg = ErrorStringPrintf("ran off the end (at %d)", i);
- return false;
- }
-
- int64_t local_hdr_offset = Le32ToHost(ptr + kCDELocalOffset);
- if (local_hdr_offset >= dir_offset_) {
- *error_msg = ErrorStringPrintf("bad LFH offset %lld at entry %d", local_hdr_offset, i);
- return false;
- }
-
- uint16_t gpbf = Le16ToHost(ptr + kCDEGPBFlags);
- if ((gpbf & kGPFUnsupportedMask) != 0) {
- *error_msg = ErrorStringPrintf("invalid general purpose bit flag %x", gpbf);
- return false;
- }
-
- uint16_t name_len = Le16ToHost(ptr + kCDENameLen);
- uint16_t extra_len = Le16ToHost(ptr + kCDEExtraLen);
- uint16_t comment_len = Le16ToHost(ptr + kCDECommentLen);
-
- // add the CDE filename to the hash table
- const char* name = reinterpret_cast<const char*>(ptr + kCDELen);
-
- // Check name for NULL characters
- if (memchr(name, 0, name_len) != NULL) {
- *error_msg = ErrorStringPrintf("filename contains NUL byte");
- return false;
- }
-
- dir_entries_.Put(StringPiece(name, name_len), ptr);
- ptr += kCDELen + name_len + extra_len + comment_len;
- if (ptr > cd_ptr + cd_length) {
- *error_msg = ErrorStringPrintf("bad CD advance (%p vs %p) at entry %d",
- ptr, cd_ptr + cd_length, i);
- return false;
- }
- }
- return true;
+ return new ZipEntry(handle_, zip_entry.release());
}
} // namespace art