diff options
author | 2023-01-06 20:58:01 +0800 | |
---|---|---|
committer | 2023-01-25 10:00:58 +0800 | |
commit | b8451b844882ae18dbc952ed3cb93af4e6c46cf3 (patch) | |
tree | b80f73c64dabc0a8f6f963f24e3cda4910ebf249 /zip | |
parent | 358ba4f0d8f392efd26696823d8d9e007b43d60f (diff) |
Support storing SHA256 checksum for files in soong_zip
Add a -sha256 argument to soong_zip that computes SHA256 checksum for
each file and stores the value in file header. The SHA information can
then be used by downstream systems that use content addressing.
Bug: 259513199
Test: zip_test.go
Test: soong_zip -o test.zip -D test_dir -sha256
Change-Id: I20e9f424bd0a4604f0dc7cc77bd65f10eb49a163
Diffstat (limited to 'zip')
-rw-r--r-- | zip/cmd/main.go | 2 | ||||
-rw-r--r-- | zip/zip.go | 76 | ||||
-rw-r--r-- | zip/zip_test.go | 66 |
3 files changed, 118 insertions, 26 deletions
diff --git a/zip/cmd/main.go b/zip/cmd/main.go index cbc73eda6..def76aa62 100644 --- a/zip/cmd/main.go +++ b/zip/cmd/main.go @@ -163,6 +163,7 @@ func main() { parallelJobs := flags.Int("parallel", runtime.NumCPU(), "number of parallel threads to use") cpuProfile := flags.String("cpuprofile", "", "write cpu profile to file") traceFile := flags.String("trace", "", "write trace to file") + sha256Checksum := flags.Bool("sha256", false, "add a zip header to each file containing its SHA256 digest") flags.Var(&rootPrefix{}, "P", "path prefix within the zip at which to place files") flags.Var(&listFiles{}, "l", "file containing list of files to zip") @@ -224,6 +225,7 @@ func main() { WriteIfChanged: *writeIfChanged, StoreSymlinks: *symlinks, IgnoreMissingFiles: *ignoreMissingFiles, + Sha256Checksum: *sha256Checksum, }) if err != nil { fmt.Fprintln(os.Stderr, "error:", err.Error()) diff --git a/zip/zip.go b/zip/zip.go index 955fe68d0..6f1a8adaf 100644 --- a/zip/zip.go +++ b/zip/zip.go @@ -17,8 +17,11 @@ package zip import ( "bytes" "compress/flate" + "crypto/sha256" + "encoding/binary" "errors" "fmt" + "hash" "hash/crc32" "io" "io/ioutil" @@ -38,6 +41,14 @@ import ( "android/soong/third_party/zip" ) +// Sha256HeaderID is a custom Header ID for the `extra` field in +// the file header to store the SHA checksum. +const Sha256HeaderID = 0x4967 + +// Sha256HeaderSignature is the signature to verify that the extra +// data block is used to store the SHA checksum. +const Sha256HeaderSignature = 0x9514 + // Block size used during parallel compression of a single file. const parallelBlockSize = 1 * 1024 * 1024 // 1MB @@ -231,6 +242,8 @@ type ZipWriter struct { stderr io.Writer fs pathtools.FileSystem + + sha256Checksum bool } type zipEntry struct { @@ -257,6 +270,7 @@ type ZipArgs struct { WriteIfChanged bool StoreSymlinks bool IgnoreMissingFiles bool + Sha256Checksum bool Stderr io.Writer Filesystem pathtools.FileSystem @@ -280,6 +294,7 @@ func zipTo(args ZipArgs, w io.Writer) error { ignoreMissingFiles: args.IgnoreMissingFiles, stderr: args.Stderr, fs: args.Filesystem, + sha256Checksum: args.Sha256Checksum, } if z.fs == nil { @@ -782,15 +797,17 @@ func (z *ZipWriter) writeFileContents(header *zip.FileHeader, r pathtools.Reader // this based on actual buffer sizes in RateLimit. ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1) - // Calculate the CRC in the background, since reading the entire - // file could take a while. + // Calculate the CRC and SHA256 in the background, since reading + // the entire file could take a while. // // We could split this up into chunks as well, but it's faster // than the compression. Due to the Go Zip API, we also need to // know the result before we can begin writing the compressed // data out to the zipfile. + // + // We calculate SHA256 only if `-sha256` is set. wg.Add(1) - go z.crcFile(r, ze, compressChan, wg) + go z.checksumFileAsync(r, ze, compressChan, wg) for start := int64(0); start < fileSize; start += parallelBlockSize { sr := io.NewSectionReader(r, start, parallelBlockSize) @@ -829,20 +846,53 @@ func (z *ZipWriter) writeFileContents(header *zip.FileHeader, r pathtools.Reader return nil } -func (z *ZipWriter) crcFile(r io.Reader, ze *zipEntry, resultChan chan *zipEntry, wg *sync.WaitGroup) { +func (z *ZipWriter) checksumFileAsync(r io.ReadSeeker, ze *zipEntry, resultChan chan *zipEntry, wg *sync.WaitGroup) { defer wg.Done() defer z.cpuRateLimiter.Finish() + z.checksumFile(r, ze) + + resultChan <- ze + close(resultChan) +} + +func (z *ZipWriter) checksumFile(r io.ReadSeeker, ze *zipEntry) { crc := crc32.NewIEEE() - _, err := io.Copy(crc, r) + writers := []io.Writer{crc} + + var shaHasher hash.Hash + if z.sha256Checksum && !ze.fh.Mode().IsDir() { + shaHasher = sha256.New() + writers = append(writers, shaHasher) + } + + w := io.MultiWriter(writers...) + + _, err := io.Copy(w, r) if err != nil { z.errors <- err return } ze.fh.CRC32 = crc.Sum32() - resultChan <- ze - close(resultChan) + if shaHasher != nil { + z.appendSHAToExtra(ze, shaHasher.Sum(nil)) + } +} + +func (z *ZipWriter) appendSHAToExtra(ze *zipEntry, checksum []byte) { + // The block of SHA256 checksum consist of: + // - Header ID, equals to Sha256HeaderID (2 bytes) + // - Data size (2 bytes) + // - Data block: + // - Signature, equals to Sha256HeaderSignature (2 bytes) + // - Data, SHA checksum value + var buf []byte + buf = binary.LittleEndian.AppendUint16(buf, Sha256HeaderID) + buf = binary.LittleEndian.AppendUint16(buf, uint16(len(checksum)+2)) + buf = binary.LittleEndian.AppendUint16(buf, Sha256HeaderSignature) + buf = append(buf, checksum...) + ze.fh.Extra = append(ze.fh.Extra, buf...) } func (z *ZipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, resultChan chan io.Reader, wg *sync.WaitGroup) { @@ -894,17 +944,9 @@ func (z *ZipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.B } func (z *ZipWriter) compressWholeFile(ze *zipEntry, r io.ReadSeeker, compressChan chan *zipEntry) { + z.checksumFile(r, ze) - crc := crc32.NewIEEE() - _, err := io.Copy(crc, r) - if err != nil { - z.errors <- err - return - } - - ze.fh.CRC32 = crc.Sum32() - - _, err = r.Seek(0, 0) + _, err := r.Seek(0, 0) if err != nil { z.errors <- err return diff --git a/zip/zip_test.go b/zip/zip_test.go index c4832dc9a..e7fdea839 100644 --- a/zip/zip_test.go +++ b/zip/zip_test.go @@ -16,6 +16,7 @@ package zip import ( "bytes" + "encoding/hex" "hash/crc32" "io" "os" @@ -35,6 +36,10 @@ var ( fileEmpty = []byte("") fileManifest = []byte("Manifest-Version: 1.0\nCreated-By: soong_zip\n\n") + sha256FileA = "d53eda7a637c99cc7fb566d96e9fa109bf15c478410a3f5eb4d4c4e26cd081f6" + sha256FileB = "430c56c5818e62bcb6d478901ef86284e97714c138f3c86aa14fd6a84b7ce5d3" + sha256FileC = "31c5ab6111f1d6aa13c2c4e92bb3c0f7c76b61b42d141af1e846eb7f6586a51c" + fileCustomManifest = []byte("Custom manifest: true\n") customManifestAfter = []byte("Manifest-Version: 1.0\nCreated-By: soong_zip\nCustom manifest: true\n\n") ) @@ -67,6 +72,20 @@ func fh(name string, contents []byte, method uint16) zip.FileHeader { } } +func fhWithSHA256(name string, contents []byte, method uint16, sha256 string) zip.FileHeader { + h := fh(name, contents, method) + // The extra field contains 38 bytes, including 2 bytes of header ID, 2 bytes + // of size, 2 bytes of signature, and 32 bytes of checksum data block. + var extra [38]byte + // The first 6 bytes contains Sha256HeaderID (0x4967), size (unit(34)) and + // Sha256HeaderSignature (0x9514) + copy(extra[0:], []byte{103, 73, 34, 0, 20, 149}) + sha256Bytes, _ := hex.DecodeString(sha256) + copy(extra[6:], sha256Bytes) + h.Extra = append(h.Extra, extra[:]...) + return h +} + func fhManifest(contents []byte) zip.FileHeader { return zip.FileHeader{ Name: "META-INF/MANIFEST.MF", @@ -87,13 +106,18 @@ func fhLink(name string, to string) zip.FileHeader { } } -func fhDir(name string) zip.FileHeader { +type fhDirOptions struct { + extra []byte +} + +func fhDir(name string, opts fhDirOptions) zip.FileHeader { return zip.FileHeader{ Name: name, Method: zip.Store, CRC32: crc32.ChecksumIEEE(nil), UncompressedSize64: 0, ExternalAttrs: (syscall.S_IFDIR|0755)<<16 | 0x10, + Extra: opts.extra, } } @@ -114,6 +138,7 @@ func TestZip(t *testing.T) { manifest string storeSymlinks bool ignoreMissingFiles bool + sha256Checksum bool files []zip.FileHeader err error @@ -320,10 +345,10 @@ func TestZip(t *testing.T) { emulateJar: true, files: []zip.FileHeader{ - fhDir("META-INF/"), + fhDir("META-INF/", fhDirOptions{extra: []byte{254, 202, 0, 0}}), fhManifest(fileManifest), - fhDir("a/"), - fhDir("a/a/"), + fhDir("a/", fhDirOptions{}), + fhDir("a/a/", fhDirOptions{}), fh("a/a/a", fileA, zip.Deflate), fh("a/a/b", fileB, zip.Deflate), }, @@ -338,10 +363,10 @@ func TestZip(t *testing.T) { manifest: "manifest.txt", files: []zip.FileHeader{ - fhDir("META-INF/"), + fhDir("META-INF/", fhDirOptions{extra: []byte{254, 202, 0, 0}}), fhManifest(customManifestAfter), - fhDir("a/"), - fhDir("a/a/"), + fhDir("a/", fhDirOptions{}), + fhDir("a/a/", fhDirOptions{}), fh("a/a/a", fileA, zip.Deflate), fh("a/a/b", fileB, zip.Deflate), }, @@ -355,8 +380,8 @@ func TestZip(t *testing.T) { dirEntries: true, files: []zip.FileHeader{ - fhDir("a/"), - fhDir("a/a/"), + fhDir("a/", fhDirOptions{}), + fhDir("a/a/", fhDirOptions{}), fh("a/a/a", fileA, zip.Deflate), fh("a/a/b", fileB, zip.Deflate), }, @@ -412,6 +437,23 @@ func TestZip(t *testing.T) { fh("a/a/a", fileA, zip.Deflate), }, }, + { + name: "generate SHA256 checksum", + args: fileArgsBuilder(). + File("a/a/a"). + File("a/a/b"). + File("a/a/c"). + File("c"), + compressionLevel: 9, + sha256Checksum: true, + + files: []zip.FileHeader{ + fhWithSHA256("a/a/a", fileA, zip.Deflate, sha256FileA), + fhWithSHA256("a/a/b", fileB, zip.Deflate, sha256FileB), + fhWithSHA256("a/a/c", fileC, zip.Deflate, sha256FileC), + fhWithSHA256("c", fileC, zip.Deflate, sha256FileC), + }, + }, // errors { @@ -465,6 +507,7 @@ func TestZip(t *testing.T) { args.ManifestSourcePath = test.manifest args.StoreSymlinks = test.storeSymlinks args.IgnoreMissingFiles = test.ignoreMissingFiles + args.Sha256Checksum = test.sha256Checksum args.Filesystem = mockFs args.Stderr = &bytes.Buffer{} @@ -555,6 +598,11 @@ func TestZip(t *testing.T) { t.Errorf("incorrect file %s method want %v got %v", want.Name, want.Method, got.Method) } + + if !bytes.Equal(want.Extra, got.Extra) { + t.Errorf("incorrect file %s extra want %v got %v", want.Name, + want.Extra, got.Extra) + } } }) } |