libziparchive: Use ReadAtOffset exclusively

The use of ReadAtOffset is meant to allow concurrent access
to the zip archive once it has been loaded. There were places
where this was the case, and some places that did a seek + read
combination, which could lead to data races.

NOTE: On Windows, we are not using pread as the implementation of
ReadAtOffset, therefore the guarantees on Windows are weaker.

On Linux, pread allows the file descriptor to be read at a specific
offset without changing the read pointer. This allows inherited fd's
and duped fds to be read concurrently.

On Windows, we use the ReadFile API, which allows for an atomic seek +
read operation, but modifies the read pointer. This means that any mix
use of ReadAtOffset and Read will have races. Just using ReadAtOffset is
safe.

For the Windows case, this is fine as the libziparchive code now only
uses ReadAtOffset.

Bug: 62184114
Bug: 62101783
Test: make ziparchive-tests (existing tests pass)
Change-Id: Ia7f9a30af2216682cdd9d578d26e84bc46773bb9
diff --git a/base/file.cpp b/base/file.cpp
index a2f2887..2f697a1 100644
--- a/base/file.cpp
+++ b/base/file.cpp
@@ -153,6 +153,37 @@
   return true;
 }
 
+#if defined(_WIN32)
+// Windows implementation of pread. Note that this DOES move the file descriptors read position,
+// but it does so atomically.
+static ssize_t pread(int fd, void* data, size_t byte_count, off64_t offset) {
+  DWORD bytes_read;
+  OVERLAPPED overlapped;
+  memset(&overlapped, 0, sizeof(OVERLAPPED));
+  overlapped.Offset = static_cast<DWORD>(offset);
+  overlapped.OffsetHigh = static_cast<DWORD>(offset >> 32);
+  if (!ReadFile(reinterpret_cast<HANDLE>(_get_osfhandle(fd)), data, static_cast<DWORD>(byte_count),
+                &bytes_read, &overlapped)) {
+    // In case someone tries to read errno (since this is masquerading as a POSIX call)
+    errno = EIO;
+    return -1;
+  }
+  return static_cast<ssize_t>(bytes_read);
+}
+#endif
+
+bool ReadFullyAtOffset(int fd, void* data, size_t byte_count, off64_t offset) {
+  uint8_t* p = reinterpret_cast<uint8_t*>(data);
+  while (byte_count > 0) {
+    ssize_t n = TEMP_FAILURE_RETRY(pread(fd, p, byte_count, offset));
+    if (n <= 0) return false;
+    p += n;
+    byte_count -= n;
+    offset += n;
+  }
+  return true;
+}
+
 bool WriteFully(int fd, const void* data, size_t byte_count) {
   const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
   size_t remaining = byte_count;
diff --git a/base/include/android-base/file.h b/base/include/android-base/file.h
index 651f529..af14892 100644
--- a/base/include/android-base/file.h
+++ b/base/include/android-base/file.h
@@ -42,6 +42,17 @@
 #endif
 
 bool ReadFully(int fd, void* data, size_t byte_count);
+
+// Reads `byte_count` bytes from the file descriptor at the specified offset.
+// Returns false if there was an IO error or EOF was reached before reading `byte_count` bytes.
+//
+// NOTE: On Linux/Mac, this function wraps pread, which provides atomic read support without
+// modifying the read pointer of the file descriptor. On Windows, however, the read pointer does
+// get modified. This means that ReadFullyAtOffset can be used concurrently with other calls to the
+// same function, but concurrently seeking or reading incrementally can lead to unexpected
+// behavior.
+bool ReadFullyAtOffset(int fd, void* data, size_t byte_count, off64_t offset);
+
 bool WriteFully(int fd, const void* data, size_t byte_count);
 
 bool RemoveFileIfExists(const std::string& path, std::string* err = nullptr);
diff --git a/libziparchive/include/ziparchive/zip_archive_stream_entry.h b/libziparchive/include/ziparchive/zip_archive_stream_entry.h
index a40b799..b4766f8 100644
--- a/libziparchive/include/ziparchive/zip_archive_stream_entry.h
+++ b/libziparchive/include/ziparchive/zip_archive_stream_entry.h
@@ -40,7 +40,8 @@
 
   ZipArchiveHandle handle_;
 
-  uint32_t crc32_;
+  off64_t offset_ = 0;
+  uint32_t crc32_ = 0u;
 };
 
 #endif  // LIBZIPARCHIVE_ZIPARCHIVESTREAMENTRY_H_
diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc
index 17c268b..ad40d42 100644
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@@ -435,13 +435,20 @@
 
 static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry) {
   uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
-  if (!mapped_zip.ReadData(ddBuf, sizeof(ddBuf))) {
+  off64_t offset = entry->offset;
+  if (entry->method != kCompressStored) {
+    offset += entry->compressed_length;
+  } else {
+    offset += entry->uncompressed_length;
+  }
+
+  if (!mapped_zip.ReadAtOffset(ddBuf, sizeof(ddBuf), offset)) {
     return kIoError;
   }
 
   const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
-  const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
-  const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
+  const uint16_t ddOffset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
+  const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + ddOffset);
 
   // Validate that the values in the data descriptor match those in the central
   // directory.
@@ -899,7 +906,9 @@
     /* read as much as we can */
     if (zstream.avail_in == 0) {
       const size_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
-      if (!mapped_zip.ReadData(read_buf.data(), getSize)) {
+      off64_t offset = entry->offset + (entry->compressed_length - compressed_length);
+      // Make sure to read at offset to ensure concurrent access to the fd.
+      if (!mapped_zip.ReadAtOffset(read_buf.data(), getSize, offset)) {
         ALOGW("Zip: inflate read failed, getSize = %zu: %s", getSize, strerror(errno));
         return kIoError;
       }
@@ -962,12 +971,15 @@
   uint64_t crc = 0;
   while (count < length) {
     uint32_t remaining = length - count;
+    off64_t offset = entry->offset + count;
 
-    // Safe conversion because kBufSize is narrow enough for a 32 bit signed
-    // value.
+    // Safe conversion because kBufSize is narrow enough for a 32 bit signed value.
     const size_t block_size = (remaining > kBufSize) ? kBufSize : remaining;
-    if (!mapped_zip.ReadData(buf.data(), block_size)) {
-      ALOGW("CopyFileToFile: copy read failed, block_size = %zu: %s", block_size, strerror(errno));
+
+    // Make sure to read at offset to ensure concurrent access to the fd.
+    if (!mapped_zip.ReadAtOffset(buf.data(), block_size, offset)) {
+      ALOGW("CopyFileToFile: copy read failed, block_size = %zu, offset = %" PRId64 ": %s",
+            block_size, static_cast<int64_t>(offset), strerror(errno));
       return kIoError;
     }
 
@@ -986,12 +998,6 @@
 int32_t ExtractToWriter(ZipArchiveHandle handle, ZipEntry* entry, Writer* writer) {
   ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
   const uint16_t method = entry->method;
-  off64_t data_offset = entry->offset;
-
-  if (!archive->mapped_zip.SeekToOffset(data_offset)) {
-    ALOGW("Zip: lseek to data at %" PRId64 " failed", static_cast<int64_t>(data_offset));
-    return kIoError;
-  }
 
   // this should default to kUnknownCompressionMethod.
   int32_t return_value = -1;
@@ -1111,52 +1117,21 @@
   }
 }
 
-bool MappedZipFile::SeekToOffset(off64_t offset) {
-  if (has_fd_) {
-    if (lseek64(fd_, offset, SEEK_SET) != offset) {
-      ALOGE("Zip: lseek to %" PRId64 " failed: %s\n", offset, strerror(errno));
-      return false;
-    }
-    return true;
-  } else {
-    if (offset < 0 || offset > static_cast<off64_t>(data_length_)) {
-      ALOGE("Zip: invalid offset: %" PRId64 ", data length: %" PRId64 "\n", offset, data_length_);
-      return false;
-    }
-
-    read_pos_ = offset;
-    return true;
-  }
-}
-
-bool MappedZipFile::ReadData(uint8_t* buffer, size_t read_amount) {
-  if (has_fd_) {
-    if (!android::base::ReadFully(fd_, buffer, read_amount)) {
-      ALOGE("Zip: read from %d failed\n", fd_);
-      return false;
-    }
-  } else {
-    memcpy(buffer, static_cast<uint8_t*>(base_ptr_) + read_pos_, read_amount);
-    read_pos_ += read_amount;
-  }
-  return true;
-}
-
 // Attempts to read |len| bytes into |buf| at offset |off|.
 bool MappedZipFile::ReadAtOffset(uint8_t* buf, size_t len, off64_t off) {
-#if !defined(_WIN32)
   if (has_fd_) {
-    if (static_cast<size_t>(TEMP_FAILURE_RETRY(pread64(fd_, buf, len, off))) != len) {
+    if (!android::base::ReadFullyAtOffset(fd_, buf, len, off)) {
       ALOGE("Zip: failed to read at offset %" PRId64 "\n", off);
       return false;
     }
-    return true;
+  } else {
+    if (off < 0 || off > static_cast<off64_t>(data_length_)) {
+      ALOGE("Zip: invalid offset: %" PRId64 ", data length: %" PRId64 "\n", off, data_length_);
+      return false;
+    }
+    memcpy(buf, static_cast<uint8_t*>(base_ptr_) + off, len);
   }
-#endif
-  if (!SeekToOffset(off)) {
-    return false;
-  }
-  return ReadData(buf, len);
+  return true;
 }
 
 void CentralDirectory::Initialize(void* map_base_ptr, off64_t cd_start_offset, size_t cd_size) {
diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h
index 840f1af..174aa3f 100644
--- a/libziparchive/zip_archive_private.h
+++ b/libziparchive/zip_archive_private.h
@@ -93,14 +93,10 @@
 class MappedZipFile {
  public:
   explicit MappedZipFile(const int fd)
-      : has_fd_(true), fd_(fd), base_ptr_(nullptr), data_length_(0), read_pos_(0) {}
+      : has_fd_(true), fd_(fd), base_ptr_(nullptr), data_length_(0) {}
 
   explicit MappedZipFile(void* address, size_t length)
-      : has_fd_(false),
-        fd_(-1),
-        base_ptr_(address),
-        data_length_(static_cast<off64_t>(length)),
-        read_pos_(0) {}
+      : has_fd_(false), fd_(-1), base_ptr_(address), data_length_(static_cast<off64_t>(length)) {}
 
   bool HasFd() const { return has_fd_; }
 
@@ -110,10 +106,6 @@
 
   off64_t GetFileLength() const;
 
-  bool SeekToOffset(off64_t offset);
-
-  bool ReadData(uint8_t* buffer, size_t read_amount);
-
   bool ReadAtOffset(uint8_t* buf, size_t len, off64_t off);
 
  private:
@@ -127,8 +119,6 @@
 
   void* const base_ptr_;
   const off64_t data_length_;
-  // read_pos_ is the offset to the base_ptr_ where we read data from.
-  size_t read_pos_;
 };
 
 class CentralDirectory {
diff --git a/libziparchive/zip_archive_stream_entry.cc b/libziparchive/zip_archive_stream_entry.cc
index 50352ef..9ec89b1 100644
--- a/libziparchive/zip_archive_stream_entry.cc
+++ b/libziparchive/zip_archive_stream_entry.cc
@@ -38,13 +38,8 @@
 static constexpr size_t kBufSize = 65535;
 
 bool ZipArchiveStreamEntry::Init(const ZipEntry& entry) {
-  ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
-  off64_t data_offset = entry.offset;
-  if (!archive->mapped_zip.SeekToOffset(data_offset)) {
-    ALOGW("lseek to data at %" PRId64 " failed: %s", data_offset, strerror(errno));
-    return false;
-  }
   crc32_ = entry.crc32;
+  offset_ = entry.offset;
   return true;
 }
 
@@ -61,11 +56,11 @@
  protected:
   bool Init(const ZipEntry& entry) override;
 
-  uint32_t length_;
+  uint32_t length_ = 0u;
 
  private:
   std::vector<uint8_t> data_;
-  uint32_t computed_crc32_;
+  uint32_t computed_crc32_ = 0u;
 };
 
 bool ZipArchiveStreamEntryUncompressed::Init(const ZipEntry& entry) {
@@ -89,7 +84,7 @@
   size_t bytes = (length_ > data_.size()) ? data_.size() : length_;
   ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
   errno = 0;
-  if (!archive->mapped_zip.ReadData(data_.data(), bytes)) {
+  if (!archive->mapped_zip.ReadAtOffset(data_.data(), bytes, offset_)) {
     if (errno != 0) {
       ALOGE("Error reading from archive fd: %s", strerror(errno));
     } else {
@@ -104,6 +99,7 @@
   }
   computed_crc32_ = crc32(computed_crc32_, data_.data(), data_.size());
   length_ -= bytes;
+  offset_ += bytes;
   return &data_;
 }
 
@@ -129,9 +125,9 @@
   z_stream z_stream_;
   std::vector<uint8_t> in_;
   std::vector<uint8_t> out_;
-  uint32_t uncompressed_length_;
-  uint32_t compressed_length_;
-  uint32_t computed_crc32_;
+  uint32_t uncompressed_length_ = 0u;
+  uint32_t compressed_length_ = 0u;
+  uint32_t computed_crc32_ = 0u;
 };
 
 // This method is using libz macros with old-style-casts
@@ -210,7 +206,7 @@
       size_t bytes = (compressed_length_ > in_.size()) ? in_.size() : compressed_length_;
       ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
       errno = 0;
-      if (!archive->mapped_zip.ReadData(in_.data(), bytes)) {
+      if (!archive->mapped_zip.ReadAtOffset(in_.data(), bytes, offset_)) {
         if (errno != 0) {
           ALOGE("Error reading from archive fd: %s", strerror(errno));
         } else {
@@ -220,6 +216,7 @@
       }
 
       compressed_length_ -= bytes;
+      offset_ += bytes;
       z_stream_.next_in = in_.data();
       z_stream_.avail_in = bytes;
     }