diff options
Diffstat (limited to 'runtime/zip_archive.cc')
| -rw-r--r-- | runtime/zip_archive.cc | 524 |
1 files changed, 42 insertions, 482 deletions
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc index db273ec7bf..8cb199375b 100644 --- a/runtime/zip_archive.cc +++ b/runtime/zip_archive.cc @@ -30,272 +30,23 @@ namespace art { -static const size_t kBufSize = 32 * KB; - -// Get 2 little-endian bytes. -static uint32_t Le16ToHost(const byte* src) { - return ((src[0] << 0) | - (src[1] << 8)); -} - -// Get 4 little-endian bytes. -static uint32_t Le32ToHost(const byte* src) { - return ((src[0] << 0) | - (src[1] << 8) | - (src[2] << 16) | - (src[3] << 24)); -} - -uint16_t ZipEntry::GetCompressionMethod() { - return Le16ToHost(ptr_ + ZipArchive::kCDEMethod); -} - -uint32_t ZipEntry::GetCompressedLength() { - return Le32ToHost(ptr_ + ZipArchive::kCDECompLen); -} - uint32_t ZipEntry::GetUncompressedLength() { - return Le32ToHost(ptr_ + ZipArchive::kCDEUncompLen); + return zip_entry_->uncompressed_length; } uint32_t ZipEntry::GetCrc32() { - return Le32ToHost(ptr_ + ZipArchive::kCDECRC); + return zip_entry_->crc32; } -off64_t ZipEntry::GetDataOffset() { - // All we have is the offset to the Local File Header, which is - // variable size, so we have to read the contents of the struct to - // figure out where the actual data starts. - - // We also need to make sure that the lengths are not so large that - // somebody trying to map the compressed or uncompressed data runs - // off the end of the mapped region. - - off64_t dir_offset = zip_archive_->dir_offset_; - int64_t lfh_offset = Le32ToHost(ptr_ + ZipArchive::kCDELocalOffset); - if (lfh_offset + ZipArchive::kLFHLen >= dir_offset) { - LOG(WARNING) << "Zip: bad LFH offset in zip"; - return -1; - } - - if (lseek64(zip_archive_->fd_, lfh_offset, SEEK_SET) != lfh_offset) { - PLOG(WARNING) << "Zip: failed seeking to LFH at offset " << lfh_offset; - return -1; - } - - uint8_t lfh_buf[ZipArchive::kLFHLen]; - ssize_t actual = TEMP_FAILURE_RETRY(read(zip_archive_->fd_, lfh_buf, sizeof(lfh_buf))); - if (actual != sizeof(lfh_buf)) { - LOG(WARNING) << "Zip: failed reading LFH from offset " << lfh_offset; - return -1; - } - - if (Le32ToHost(lfh_buf) != ZipArchive::kLFHSignature) { - LOG(WARNING) << "Zip: didn't find signature at start of LFH, offset " << lfh_offset; - return -1; - } - - uint32_t gpbf = Le16ToHost(lfh_buf + ZipArchive::kLFHGPBFlags); - if ((gpbf & ZipArchive::kGPFUnsupportedMask) != 0) { - LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf; - return -1; - } - - off64_t data_offset = (lfh_offset + ZipArchive::kLFHLen - + Le16ToHost(lfh_buf + ZipArchive::kLFHNameLen) - + Le16ToHost(lfh_buf + ZipArchive::kLFHExtraLen)); - if (data_offset >= dir_offset) { - LOG(WARNING) << "Zip: bad data offset " << data_offset << " in zip"; - return -1; - } - - // check lengths - - if (static_cast<off64_t>(data_offset + GetCompressedLength()) > dir_offset) { - LOG(WARNING) << "Zip: bad compressed length in zip " - << "(" << data_offset << " + " << GetCompressedLength() - << " > " << dir_offset << ")"; - return -1; - } - - if (GetCompressionMethod() == kCompressStored - && static_cast<off64_t>(data_offset + GetUncompressedLength()) > dir_offset) { - LOG(WARNING) << "Zip: bad uncompressed length in zip " - << "(" << data_offset << " + " << GetUncompressedLength() - << " > " << dir_offset << ")"; - return -1; - } - - return data_offset; -} - -static bool CopyFdToMemory(uint8_t* begin, size_t size, int in, size_t count) { - uint8_t* dst = begin; - std::vector<uint8_t> buf(kBufSize); - while (count != 0) { - size_t bytes_to_read = (count > kBufSize) ? kBufSize : count; - ssize_t actual = TEMP_FAILURE_RETRY(read(in, &buf[0], bytes_to_read)); - if (actual != static_cast<ssize_t>(bytes_to_read)) { - PLOG(WARNING) << "Zip: short read"; - return false; - } - memcpy(dst, &buf[0], bytes_to_read); - dst += bytes_to_read; - count -= bytes_to_read; - } - DCHECK_EQ(dst, begin + size); - return true; -} - -class ZStream { - public: - ZStream(byte* write_buf, size_t write_buf_size) { - // Initialize the zlib stream struct. - memset(&zstream_, 0, sizeof(zstream_)); - zstream_.zalloc = Z_NULL; - zstream_.zfree = Z_NULL; - zstream_.opaque = Z_NULL; - zstream_.next_in = NULL; - zstream_.avail_in = 0; - zstream_.next_out = reinterpret_cast<Bytef*>(write_buf); - zstream_.avail_out = write_buf_size; - zstream_.data_type = Z_UNKNOWN; - } - - z_stream& Get() { - return zstream_; - } - - ~ZStream() { - inflateEnd(&zstream_); - } - private: - z_stream zstream_; -}; - -static bool InflateToMemory(uint8_t* begin, size_t size, - int in, size_t uncompressed_length, size_t compressed_length) { - uint8_t* dst = begin; - UniquePtr<uint8_t[]> read_buf(new uint8_t[kBufSize]); - UniquePtr<uint8_t[]> write_buf(new uint8_t[kBufSize]); - if (read_buf.get() == NULL || write_buf.get() == NULL) { - LOG(WARNING) << "Zip: failed to allocate buffer to inflate"; - return false; - } - - UniquePtr<ZStream> zstream(new ZStream(write_buf.get(), kBufSize)); - - // Use the undocumented "negative window bits" feature to tell zlib - // that there's no zlib header waiting for it. - int zerr = inflateInit2(&zstream->Get(), -MAX_WBITS); - if (zerr != Z_OK) { - if (zerr == Z_VERSION_ERROR) { - LOG(ERROR) << "Installed zlib is not compatible with linked version (" << ZLIB_VERSION << ")"; - } else { - LOG(WARNING) << "Call to inflateInit2 failed (zerr=" << zerr << ")"; - } - return false; - } - - size_t remaining = compressed_length; - do { - // read as much as we can - if (zstream->Get().avail_in == 0) { - size_t bytes_to_read = (remaining > kBufSize) ? kBufSize : remaining; - - ssize_t actual = TEMP_FAILURE_RETRY(read(in, read_buf.get(), bytes_to_read)); - if (actual != static_cast<ssize_t>(bytes_to_read)) { - LOG(WARNING) << "Zip: inflate read failed (" << actual << " vs " << bytes_to_read << ")"; - return false; - } - remaining -= bytes_to_read; - zstream->Get().next_in = read_buf.get(); - zstream->Get().avail_in = bytes_to_read; - } - - // uncompress the data - zerr = inflate(&zstream->Get(), Z_NO_FLUSH); - if (zerr != Z_OK && zerr != Z_STREAM_END) { - LOG(WARNING) << "Zip: inflate zerr=" << zerr - << " (next_in=" << zstream->Get().next_in - << " avail_in=" << zstream->Get().avail_in - << " next_out=" << zstream->Get().next_out - << " avail_out=" << zstream->Get().avail_out - << ")"; - return false; - } - - // write when we're full or when we're done - if (zstream->Get().avail_out == 0 || - (zerr == Z_STREAM_END && zstream->Get().avail_out != kBufSize)) { - size_t bytes_to_write = zstream->Get().next_out - write_buf.get(); - memcpy(dst, write_buf.get(), bytes_to_write); - dst += bytes_to_write; - zstream->Get().next_out = write_buf.get(); - zstream->Get().avail_out = kBufSize; - } - } while (zerr == Z_OK); - - DCHECK_EQ(zerr, Z_STREAM_END); // other errors should've been caught - - // paranoia - if (zstream->Get().total_out != uncompressed_length) { - LOG(WARNING) << "Zip: size mismatch on inflated file (" - << zstream->Get().total_out << " vs " << uncompressed_length << ")"; - return false; - } - - DCHECK_EQ(dst, begin + size); - return true; -} bool ZipEntry::ExtractToFile(File& file, std::string* error_msg) { - uint32_t length = GetUncompressedLength(); - int result = TEMP_FAILURE_RETRY(ftruncate(file.Fd(), length)); - if (result == -1) { - *error_msg = StringPrintf("Zip: failed to ftruncate '%s' to length %ud", file.GetPath().c_str(), - length); - return false; - } - - UniquePtr<MemMap> map(MemMap::MapFile(length, PROT_READ | PROT_WRITE, MAP_SHARED, file.Fd(), 0, - file.GetPath().c_str(), error_msg)); - if (map.get() == NULL) { - *error_msg = StringPrintf("Zip: failed to mmap space for '%s': %s", file.GetPath().c_str(), - error_msg->c_str()); - return false; - } - - return ExtractToMemory(map->Begin(), map->Size(), error_msg); -} - -bool ZipEntry::ExtractToMemory(uint8_t* begin, size_t size, std::string* error_msg) { - // If size is zero, data offset will be meaningless, so bail out early. - if (size == 0) { - return true; - } - off64_t data_offset = GetDataOffset(); - if (data_offset == -1) { - *error_msg = StringPrintf("Zip: data_offset=%lld", data_offset); - return false; - } - if (lseek64(zip_archive_->fd_, data_offset, SEEK_SET) != data_offset) { - *error_msg = StringPrintf("Zip: lseek to data at %lld failed", data_offset); + const int32_t error = ExtractEntryToFile(handle_, zip_entry_, file.Fd()); + if (error) { + *error_msg = std::string(ErrorCodeString(error)); return false; } - // TODO: this doesn't verify the data's CRC, but probably should (especially - // for uncompressed data). - switch (GetCompressionMethod()) { - case kCompressStored: - return CopyFdToMemory(begin, size, zip_archive_->fd_, GetUncompressedLength()); - case kCompressDeflated: - return InflateToMemory(begin, size, zip_archive_->fd_, - GetUncompressedLength(), GetCompressedLength()); - default: - *error_msg = StringPrintf("Zip: unknown compression method 0x%x", GetCompressionMethod()); - return false; - } + return true; } MemMap* ZipEntry::ExtractToMemMap(const char* entry_filename, std::string* error_msg) { @@ -303,18 +54,18 @@ MemMap* ZipEntry::ExtractToMemMap(const char* entry_filename, std::string* error name += " extracted in memory from "; name += entry_filename; UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(), - NULL, - GetUncompressedLength(), + NULL, GetUncompressedLength(), PROT_READ | PROT_WRITE, error_msg)); if (map.get() == nullptr) { DCHECK(!error_msg->empty()); - return NULL; + return nullptr; } - bool success = ExtractToMemory(map->Begin(), map->Size(), error_msg); - if (!success) { - LOG(ERROR) << "Zip: Failed to extract '" << entry_filename << "' to memory"; - return NULL; + const int32_t error = ExtractToMemory(handle_, zip_entry_, + map->Begin(), map->Size()); + if (error) { + *error_msg = std::string(ErrorCodeString(error)); + return nullptr; } return map.release(); @@ -336,238 +87,47 @@ static void SetCloseOnExec(int fd) { ZipArchive* ZipArchive::Open(const char* filename, std::string* error_msg) { DCHECK(filename != nullptr); - int fd = open(filename, O_RDONLY, 0); - if (fd == -1) { - *error_msg = StringPrintf("Zip: unable to open '%s': %s", filename, strerror(errno)); - return NULL; - } - return OpenFromFd(fd, filename, error_msg); -} -ZipArchive* ZipArchive::OpenFromFd(int fd, const char* filename, std::string* error_msg) { - SetCloseOnExec(fd); - UniquePtr<ZipArchive> zip_archive(new ZipArchive(fd, filename)); - CHECK(zip_archive.get() != nullptr); - if (!zip_archive->MapCentralDirectory(error_msg)) { - zip_archive->Close(); - return NULL; - } - if (!zip_archive->Parse(error_msg)) { - zip_archive->Close(); - return NULL; + ZipArchiveHandle handle; + const int32_t error = OpenArchive(filename, &handle); + if (error) { + *error_msg = std::string(ErrorCodeString(error)); + CloseArchive(handle); + return nullptr; } - return zip_archive.release(); -} - -ZipEntry* ZipArchive::Find(const char* name) const { - DCHECK(name != NULL); - DirEntries::const_iterator it = dir_entries_.find(name); - if (it == dir_entries_.end()) { - return NULL; - } - return new ZipEntry(this, (*it).second); -} -void ZipArchive::Close() { - if (fd_ != -1) { - close(fd_); - } - fd_ = -1; - num_entries_ = 0; - dir_offset_ = 0; -} - -std::string ZipArchive::ErrorStringPrintf(const char* fmt, ...) { - va_list ap; - va_start(ap, fmt); - std::string result(StringPrintf("Zip '%s' : ", filename_.c_str())); - StringAppendV(&result, fmt, ap); - va_end(ap); - return result; + SetCloseOnExec(GetFileDescriptor(handle)); + return new ZipArchive(handle); } -// Find the zip Central Directory and memory-map it. -// -// On success, returns true after populating fields from the EOCD area: -// num_entries_ -// dir_offset_ -// dir_map_ -bool ZipArchive::MapCentralDirectory(std::string* error_msg) { - /* - * Get and test file length. - */ - off64_t file_length = lseek64(fd_, 0, SEEK_END); - if (file_length < kEOCDLen) { - *error_msg = ErrorStringPrintf("length %lld is too small to be zip", file_length); - return false; - } - - size_t read_amount = kMaxEOCDSearch; - if (file_length < off64_t(read_amount)) { - read_amount = file_length; - } - - UniquePtr<uint8_t[]> scan_buf(new uint8_t[read_amount]); - CHECK(scan_buf.get() != nullptr); - - /* - * Make sure this is a Zip archive. - */ - if (lseek64(fd_, 0, SEEK_SET) != 0) { - *error_msg = ErrorStringPrintf("seek to start failed: %s", strerror(errno)); - return false; - } - - ssize_t actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), sizeof(int32_t))); - if (actual != static_cast<ssize_t>(sizeof(int32_t))) { - *error_msg = ErrorStringPrintf("couldn\'t read first signature from zip archive: %s", - strerror(errno)); - return false; - } - - unsigned int header = Le32ToHost(scan_buf.get()); - if (header != kLFHSignature) { - *error_msg = ErrorStringPrintf("not a zip archive (found 0x%x)", header); - return false; - } - - // Perform the traditional EOCD snipe hunt. - // - // We're searching for the End of Central Directory magic number, - // which appears at the start of the EOCD block. It's followed by - // 18 bytes of EOCD stuff and up to 64KB of archive comment. We - // need to read the last part of the file into a buffer, dig through - // it to find the magic number, parse some values out, and use those - // to determine the extent of the CD. - // - // We start by pulling in the last part of the file. - off64_t search_start = file_length - read_amount; - - if (lseek64(fd_, search_start, SEEK_SET) != search_start) { - *error_msg = ErrorStringPrintf("seek %lld failed: %s", search_start, strerror(errno)); - return false; - } - actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), read_amount)); - if (actual != static_cast<ssize_t>(read_amount)) { - *error_msg = ErrorStringPrintf("read %lld, expected %zd. %s", search_start, read_amount, - strerror(errno)); - return false; - } - - - // Scan backward for the EOCD magic. In an archive without a trailing - // comment, we'll find it on the first try. (We may want to consider - // doing an initial minimal read; if we don't find it, retry with a - // second read as above.) - int i; - for (i = read_amount - kEOCDLen; i >= 0; i--) { - if (scan_buf.get()[i] == 0x50 && Le32ToHost(&(scan_buf.get())[i]) == kEOCDSignature) { - break; - } - } - if (i < 0) { - *error_msg = ErrorStringPrintf("EOCD not found, not a zip file"); - return false; - } - - off64_t eocd_offset = search_start + i; - const byte* eocd_ptr = scan_buf.get() + i; - - CHECK(eocd_offset < file_length); - - // Grab the CD offset and size, and the number of entries in the - // archive. Verify that they look reasonable. - uint16_t disk_number = Le16ToHost(eocd_ptr + kEOCDDiskNumber); - uint16_t disk_with_central_dir = Le16ToHost(eocd_ptr + kEOCDDiskNumberForCD); - uint16_t num_entries = Le16ToHost(eocd_ptr + kEOCDNumEntries); - uint16_t total_num_entries = Le16ToHost(eocd_ptr + kEOCDTotalNumEntries); - uint32_t dir_size = Le32ToHost(eocd_ptr + kEOCDSize); - uint32_t dir_offset = Le32ToHost(eocd_ptr + kEOCDFileOffset); - uint16_t comment_size = Le16ToHost(eocd_ptr + kEOCDCommentSize); - - if ((uint64_t) dir_offset + (uint64_t) dir_size > (uint64_t) eocd_offset) { - *error_msg = ErrorStringPrintf("bad offsets (dir=%ud, size=%ud, eocd=%lld)", - dir_offset, dir_size, eocd_offset); - return false; - } - if (num_entries == 0) { - *error_msg = ErrorStringPrintf("empty archive?"); - return false; - } else if (num_entries != total_num_entries || disk_number != 0 || disk_with_central_dir != 0) { - *error_msg = ErrorStringPrintf("spanned archives not supported"); - return false; - } - - // Check to see if comment is a sane size - if ((comment_size > (file_length - kEOCDLen)) - || (eocd_offset > (file_length - kEOCDLen) - comment_size)) { - *error_msg = ErrorStringPrintf("comment size runs off end of file"); - return false; - } +ZipArchive* ZipArchive::OpenFromFd(int fd, const char* filename, std::string* error_msg) { + DCHECK(filename != nullptr); + DCHECK_GT(fd, 0); - // It all looks good. Create a mapping for the CD. - dir_map_.reset(MemMap::MapFile(dir_size, PROT_READ, MAP_SHARED, fd_, dir_offset, - filename_.c_str(), error_msg)); - if (dir_map_.get() == NULL) { - return false; + ZipArchiveHandle handle; + const int32_t error = OpenArchiveFd(fd, filename, &handle); + if (error) { + *error_msg = std::string(ErrorCodeString(error)); + CloseArchive(handle); + return nullptr; } - num_entries_ = num_entries; - dir_offset_ = dir_offset; - return true; + SetCloseOnExec(GetFileDescriptor(handle)); + return new ZipArchive(handle); } -bool ZipArchive::Parse(std::string* error_msg) { - const byte* cd_ptr = dir_map_->Begin(); - size_t cd_length = dir_map_->Size(); +ZipEntry* ZipArchive::Find(const char* name, std::string* error_msg) const { + DCHECK(name != nullptr); - // Walk through the central directory, adding entries to the hash - // table and verifying values. - const byte* ptr = cd_ptr; - for (int i = 0; i < num_entries_; i++) { - if (Le32ToHost(ptr) != kCDESignature) { - *error_msg = ErrorStringPrintf("missed a central dir sig (at %d)", i); - return false; - } - if (ptr + kCDELen > cd_ptr + cd_length) { - *error_msg = ErrorStringPrintf("ran off the end (at %d)", i); - return false; - } - - int64_t local_hdr_offset = Le32ToHost(ptr + kCDELocalOffset); - if (local_hdr_offset >= dir_offset_) { - *error_msg = ErrorStringPrintf("bad LFH offset %lld at entry %d", local_hdr_offset, i); - return false; - } - - uint16_t gpbf = Le16ToHost(ptr + kCDEGPBFlags); - if ((gpbf & kGPFUnsupportedMask) != 0) { - *error_msg = ErrorStringPrintf("invalid general purpose bit flag %x", gpbf); - return false; - } - - uint16_t name_len = Le16ToHost(ptr + kCDENameLen); - uint16_t extra_len = Le16ToHost(ptr + kCDEExtraLen); - uint16_t comment_len = Le16ToHost(ptr + kCDECommentLen); - - // add the CDE filename to the hash table - const char* name = reinterpret_cast<const char*>(ptr + kCDELen); - - // Check name for NULL characters - if (memchr(name, 0, name_len) != NULL) { - *error_msg = ErrorStringPrintf("filename contains NUL byte"); - return false; - } - - dir_entries_.Put(StringPiece(name, name_len), ptr); - ptr += kCDELen + name_len + extra_len + comment_len; - if (ptr > cd_ptr + cd_length) { - *error_msg = ErrorStringPrintf("bad CD advance (%p vs %p) at entry %d", - ptr, cd_ptr + cd_length, i); - return false; - } + // Resist the urge to delete the space. <: is a bigraph sequence. + UniquePtr< ::ZipEntry> zip_entry(new ::ZipEntry); + const int32_t error = FindEntry(handle_, name, zip_entry.get()); + if (error) { + *error_msg = std::string(ErrorCodeString(error)); + return nullptr; } - return true; + + return new ZipEntry(handle_, zip_entry.release()); } } // namespace art |