diff options
| -rw-r--r-- | cmd/merge_zips/Android.bp | 1 | ||||
| -rw-r--r-- | cmd/merge_zips/merge_zips.go | 906 | ||||
| -rw-r--r-- | cmd/merge_zips/merge_zips_test.go | 100 |
3 files changed, 680 insertions, 327 deletions
diff --git a/cmd/merge_zips/Android.bp b/cmd/merge_zips/Android.bp index ab658fd0d..f70c86eb6 100644 --- a/cmd/merge_zips/Android.bp +++ b/cmd/merge_zips/Android.bp @@ -18,6 +18,7 @@ blueprint_go_binary { "android-archive-zip", "blueprint-pathtools", "soong-jar", + "soong-zip", ], srcs: [ "merge_zips.go", diff --git a/cmd/merge_zips/merge_zips.go b/cmd/merge_zips/merge_zips.go index 68fe2592d..27179cb28 100644 --- a/cmd/merge_zips/merge_zips.go +++ b/cmd/merge_zips/merge_zips.go @@ -30,457 +30,723 @@ import ( "android/soong/jar" "android/soong/third_party/zip" + soongZip "android/soong/zip" ) -type fileList []string +// Input zip: we can open it, close it, and obtain an array of entries +type InputZip interface { + Name() string + Open() error + Close() error + Entries() []*zip.File + IsOpen() bool +} -func (f *fileList) String() string { - return `""` +// An entry that can be written to the output zip +type ZipEntryContents interface { + String() string + IsDir() bool + CRC32() uint32 + Size() uint64 + WriteToZip(dest string, zw *zip.Writer) error } -func (f *fileList) Set(name string) error { - *f = append(*f, filepath.Clean(name)) +// a ZipEntryFromZip is a ZipEntryContents that pulls its content from another zip +// identified by the input zip and the index of the entry in its entries array +type ZipEntryFromZip struct { + inputZip InputZip + index int + name string + isDir bool + crc32 uint32 + size uint64 +} - return nil +func NewZipEntryFromZip(inputZip InputZip, entryIndex int) *ZipEntryFromZip { + fi := inputZip.Entries()[entryIndex] + newEntry := ZipEntryFromZip{inputZip: inputZip, + index: entryIndex, + name: fi.Name, + isDir: fi.FileInfo().IsDir(), + crc32: fi.CRC32, + size: fi.UncompressedSize64, + } + return &newEntry } -type zipsToNotStripSet map[string]bool +func (ze ZipEntryFromZip) String() string { + return fmt.Sprintf("%s!%s", ze.inputZip.Name(), ze.name) +} -func (s zipsToNotStripSet) String() string { - return `""` +func (ze ZipEntryFromZip) IsDir() bool { + return ze.isDir } -func (s zipsToNotStripSet) Set(zip_path string) error { - s[zip_path] = true +func (ze ZipEntryFromZip) CRC32() uint32 { + return ze.crc32 +} - return nil +func (ze ZipEntryFromZip) Size() uint64 { + return ze.size } -var ( - sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)") - emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)") - emulatePar = flag.Bool("p", false, "merge zip entries based on par format") - stripDirs fileList - stripFiles fileList - zipsToNotStrip = make(zipsToNotStripSet) - stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file") - manifest = flag.String("m", "", "manifest file to insert in jar") - pyMain = flag.String("pm", "", "__main__.py file to insert in par") - prefix = flag.String("prefix", "", "A file to prefix to the zip file") - ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn") -) +func (ze ZipEntryFromZip) WriteToZip(dest string, zw *zip.Writer) error { + if err := ze.inputZip.Open(); err != nil { + return err + } + return zw.CopyFrom(ze.inputZip.Entries()[ze.index], dest) +} -func init() { - flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards") - flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards") - flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping") +// a ZipEntryFromBuffer is a ZipEntryContents that pulls its content from a []byte +type ZipEntryFromBuffer struct { + fh *zip.FileHeader + content []byte } -func main() { - flag.Usage = func() { - fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] output [inputs...]") - flag.PrintDefaults() - } +func (be ZipEntryFromBuffer) String() string { + return "internal buffer" +} - // parse args - flag.Parse() - args := flag.Args() - if len(args) < 1 { - flag.Usage() - os.Exit(1) - } - outputPath := args[0] - inputs := args[1:] +func (be ZipEntryFromBuffer) IsDir() bool { + return be.fh.FileInfo().IsDir() +} - log.SetFlags(log.Lshortfile) +func (be ZipEntryFromBuffer) CRC32() uint32 { + return crc32.ChecksumIEEE(be.content) +} - // make writer - output, err := os.Create(outputPath) +func (be ZipEntryFromBuffer) Size() uint64 { + return uint64(len(be.content)) +} + +func (be ZipEntryFromBuffer) WriteToZip(dest string, zw *zip.Writer) error { + w, err := zw.CreateHeader(be.fh) if err != nil { - log.Fatal(err) + return err } - defer output.Close() - var offset int64 - if *prefix != "" { - prefixFile, err := os.Open(*prefix) - if err != nil { - log.Fatal(err) - } - offset, err = io.Copy(output, prefixFile) + if !be.IsDir() { + _, err = w.Write(be.content) if err != nil { - log.Fatal(err) + return err } } - writer := zip.NewWriter(output) - defer func() { - err := writer.Close() - if err != nil { - log.Fatal(err) - } - }() - writer.SetOffset(offset) + return nil +} - // make readers - readers := []namedZipReader{} - for _, input := range inputs { - reader, err := zip.OpenReader(input) - if err != nil { - log.Fatal(err) - } - defer reader.Close() - namedReader := namedZipReader{path: input, reader: &reader.Reader} - readers = append(readers, namedReader) - } +// Processing state. +type OutputZip struct { + outputWriter *zip.Writer + stripDirEntries bool + emulateJar bool + sortEntries bool + ignoreDuplicates bool + excludeDirs []string + excludeFiles []string + sourceByDest map[string]ZipEntryContents +} - if *manifest != "" && !*emulateJar { - log.Fatal(errors.New("must specify -j when specifying a manifest via -m")) +func NewOutputZip(outputWriter *zip.Writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates bool) *OutputZip { + return &OutputZip{ + outputWriter: outputWriter, + stripDirEntries: stripDirEntries, + emulateJar: emulateJar, + sortEntries: sortEntries, + sourceByDest: make(map[string]ZipEntryContents, 0), + ignoreDuplicates: ignoreDuplicates, } +} - if *pyMain != "" && !*emulatePar { - log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm")) +func (oz *OutputZip) setExcludeDirs(excludeDirs []string) { + oz.excludeDirs = make([]string, len(excludeDirs)) + for i, dir := range excludeDirs { + oz.excludeDirs[i] = filepath.Clean(dir) } +} - // do merge - err = mergeZips(readers, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar, - *stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip)) - if err != nil { - log.Fatal(err) +func (oz *OutputZip) setExcludeFiles(excludeFiles []string) { + oz.excludeFiles = excludeFiles +} + +// Adds an entry with given name whose source is given ZipEntryContents. Returns old ZipEntryContents +// if entry with given name already exists. +func (oz *OutputZip) addZipEntry(name string, source ZipEntryContents) (ZipEntryContents, error) { + if existingSource, exists := oz.sourceByDest[name]; exists { + return existingSource, nil + } + oz.sourceByDest[name] = source + // Delay writing an entry if entries need to be rearranged. + if oz.emulateJar || oz.sortEntries { + return nil, nil } + return nil, source.WriteToZip(name, oz.outputWriter) } -// a namedZipReader reads a .zip file and can say which file it's reading -type namedZipReader struct { - path string - reader *zip.Reader +// Adds an entry for the manifest (META-INF/MANIFEST.MF from the given file +func (oz *OutputZip) addManifest(manifestPath string) error { + if !oz.stripDirEntries { + if _, err := oz.addZipEntry(jar.MetaDir, ZipEntryFromBuffer{jar.MetaDirFileHeader(), nil}); err != nil { + return err + } + } + contents, err := ioutil.ReadFile(manifestPath) + if err == nil { + fh, buf, err := jar.ManifestFileContents(contents) + if err == nil { + _, err = oz.addZipEntry(jar.ManifestFile, ZipEntryFromBuffer{fh, buf}) + } + } + return err } -// a zipEntryPath refers to a file contained in a zip -type zipEntryPath struct { - zipName string - entryName string +// Adds an entry with given name and contents read from given file +func (oz *OutputZip) addZipEntryFromFile(name string, path string) error { + buf, err := ioutil.ReadFile(path) + if err == nil { + fh := &zip.FileHeader{ + Name: name, + Method: zip.Store, + UncompressedSize64: uint64(len(buf)), + } + fh.SetMode(0700) + fh.SetModTime(jar.DefaultTime) + _, err = oz.addZipEntry(name, ZipEntryFromBuffer{fh, buf}) + } + return err } -func (p zipEntryPath) String() string { - return p.zipName + "/" + p.entryName +func (oz *OutputZip) addEmptyEntry(entry string) error { + var emptyBuf []byte + fh := &zip.FileHeader{ + Name: entry, + Method: zip.Store, + UncompressedSize64: uint64(len(emptyBuf)), + } + fh.SetMode(0700) + fh.SetModTime(jar.DefaultTime) + _, err := oz.addZipEntry(entry, ZipEntryFromBuffer{fh, emptyBuf}) + return err } -// a zipEntry is a zipSource that pulls its content from another zip -type zipEntry struct { - path zipEntryPath - content *zip.File +// Returns true if given entry is to be excluded +func (oz *OutputZip) isEntryExcluded(name string) bool { + for _, dir := range oz.excludeDirs { + dir = filepath.Clean(dir) + patterns := []string{ + dir + "/", // the directory itself + dir + "/**/*", // files recursively in the directory + dir + "/**/*/", // directories recursively in the directory + } + + for _, pattern := range patterns { + match, err := pathtools.Match(pattern, name) + if err != nil { + panic(fmt.Errorf("%s: %s", err.Error(), pattern)) + } + if match { + if oz.emulateJar { + // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is + // requested. + // TODO(ccross): which files does this affect? + if name != jar.MetaDir && name != jar.ManifestFile { + return true + } + } + return true + } + } + } + + for _, pattern := range oz.excludeFiles { + match, err := pathtools.Match(pattern, name) + if err != nil { + panic(fmt.Errorf("%s: %s", err.Error(), pattern)) + } + if match { + return true + } + } + return false } -func (ze zipEntry) String() string { - return ze.path.String() +// Creates a zip entry whose contents is an entry from the given input zip. +func (oz *OutputZip) copyEntry(inputZip InputZip, index int) error { + entry := NewZipEntryFromZip(inputZip, index) + if oz.stripDirEntries && entry.IsDir() { + return nil + } + existingEntry, err := oz.addZipEntry(entry.name, entry) + if err != nil { + return err + } + if existingEntry == nil { + return nil + } + + // File types should match + if existingEntry.IsDir() != entry.IsDir() { + return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n", + entry.name, existingEntry, entry) + } + + if oz.ignoreDuplicates || + // Skip manifest and module info files that are not from the first input file + (oz.emulateJar && entry.name == jar.ManifestFile || entry.name == jar.ModuleInfoClass) || + // Identical entries + (existingEntry.CRC32() == entry.CRC32() && existingEntry.Size() == entry.Size()) || + // Directory entries + entry.IsDir() { + return nil + } + + return fmt.Errorf("Duplicate path %v found in %v and %v\n", entry.name, existingEntry, inputZip.Name()) } -func (ze zipEntry) IsDir() bool { - return ze.content.FileInfo().IsDir() +func (oz *OutputZip) entriesArray() []string { + entries := make([]string, len(oz.sourceByDest)) + i := 0 + for entry := range oz.sourceByDest { + entries[i] = entry + i++ + } + return entries } -func (ze zipEntry) CRC32() uint32 { - return ze.content.FileHeader.CRC32 +func (oz *OutputZip) jarSorted() []string { + entries := oz.entriesArray() + sort.SliceStable(entries, func(i, j int) bool { return jar.EntryNamesLess(entries[i], entries[j]) }) + return entries } -func (ze zipEntry) Size() uint64 { - return ze.content.FileHeader.UncompressedSize64 +func (oz *OutputZip) alphanumericSorted() []string { + entries := oz.entriesArray() + sort.Strings(entries) + return entries } -func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error { - return zw.CopyFrom(ze.content, dest) +func (oz *OutputZip) writeEntries(entries []string) error { + for _, entry := range entries { + source, _ := oz.sourceByDest[entry] + if err := source.WriteToZip(entry, oz.outputWriter); err != nil { + return err + } + } + return nil } -// a bufferEntry is a zipSource that pulls its content from a []byte -type bufferEntry struct { - fh *zip.FileHeader - content []byte +func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) { + // the runfiles packages needs to be populated with "__init__.py". + // the runfiles dirs have been treated as packages. + allPackages := make(map[string]bool) + initedPackages := make(map[string]bool) + getPackage := func(path string) string { + ret := filepath.Dir(path) + // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/". + if ret == "." || ret == "/" { + return "" + } + return ret + } + + // put existing __init__.py files to a set first. This set is used for preventing + // generated __init__.py files from overwriting existing ones. + for _, inputZip := range inputZips { + if err := inputZip.Open(); err != nil { + return nil, err + } + for _, file := range inputZip.Entries() { + pyPkg := getPackage(file.Name) + if filepath.Base(file.Name) == "__init__.py" { + if _, found := initedPackages[pyPkg]; found { + panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q", file.Name)) + } + initedPackages[pyPkg] = true + } + for pyPkg != "" { + if _, found := allPackages[pyPkg]; found { + break + } + allPackages[pyPkg] = true + pyPkg = getPackage(pyPkg) + } + } + } + noInitPackages := make([]string, 0) + for pyPkg := range allPackages { + if _, found := initedPackages[pyPkg]; !found { + noInitPackages = append(noInitPackages, pyPkg) + } + } + return noInitPackages, nil } -func (be bufferEntry) String() string { - return "internal buffer" +// An InputZip owned by the InputZipsManager. Opened ManagedInputZip's are chained in the open order. +type ManagedInputZip struct { + owner *InputZipsManager + realInputZip InputZip + older *ManagedInputZip + newer *ManagedInputZip } -func (be bufferEntry) IsDir() bool { - return be.fh.FileInfo().IsDir() +// Maintains the array of ManagedInputZips, keeping track of open input ones. When an InputZip is opened, +// may close some other InputZip to limit the number of open ones. +type InputZipsManager struct { + inputZips []*ManagedInputZip + nOpenZips int + maxOpenZips int + openInputZips *ManagedInputZip } -func (be bufferEntry) CRC32() uint32 { - return crc32.ChecksumIEEE(be.content) +func (miz *ManagedInputZip) unlink() { + olderMiz := miz.older + newerMiz := miz.newer + if newerMiz.older != miz || olderMiz.newer != miz { + panic(fmt.Errorf("removing %p:%#v: broken list between %p:%#v and %p:%#v", + miz, miz, newerMiz, newerMiz, olderMiz, olderMiz)) + } + olderMiz.newer = newerMiz + newerMiz.older = olderMiz + miz.newer = nil + miz.older = nil } -func (be bufferEntry) Size() uint64 { - return uint64(len(be.content)) +func (miz *ManagedInputZip) link(olderMiz *ManagedInputZip) { + if olderMiz.newer != nil || olderMiz.older != nil { + panic(fmt.Errorf("inputZip is already open")) + } + oldOlderMiz := miz.older + if oldOlderMiz.newer != miz { + panic(fmt.Errorf("broken list between %p:%#v and %p:%#v", miz, oldOlderMiz)) + } + miz.older = olderMiz + olderMiz.older = oldOlderMiz + oldOlderMiz.newer = olderMiz + olderMiz.newer = miz } -func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error { - w, err := zw.CreateHeader(be.fh) - if err != nil { - return err +func NewInputZipsManager(nInputZips, maxOpenZips int) *InputZipsManager { + if maxOpenZips < 3 { + panic(fmt.Errorf("open zips limit should be above 3")) } + // In the dummy element .older points to the most recently opened InputZip, and .newer points to the oldest. + head := new(ManagedInputZip) + head.older = head + head.newer = head + return &InputZipsManager{ + inputZips: make([]*ManagedInputZip, 0, nInputZips), + maxOpenZips: maxOpenZips, + openInputZips: head, + } +} - if !be.IsDir() { - _, err = w.Write(be.content) - if err != nil { +// InputZip factory +func (izm *InputZipsManager) Manage(inz InputZip) InputZip { + iz := &ManagedInputZip{owner: izm, realInputZip: inz} + izm.inputZips = append(izm.inputZips, iz) + return iz +} + +// Opens or reopens ManagedInputZip. +func (izm *InputZipsManager) reopen(miz *ManagedInputZip) error { + if miz.realInputZip.IsOpen() { + if miz != izm.openInputZips { + miz.unlink() + izm.openInputZips.link(miz) + } + return nil + } + if izm.nOpenZips >= izm.maxOpenZips { + if err := izm.close(izm.openInputZips.older); err != nil { return err } } + if err := miz.realInputZip.Open(); err != nil { + return err + } + izm.openInputZips.link(miz) + izm.nOpenZips++ + return nil +} +func (izm *InputZipsManager) close(miz *ManagedInputZip) error { + if miz.IsOpen() { + err := miz.realInputZip.Close() + izm.nOpenZips-- + miz.unlink() + return err + } return nil } -type zipSource interface { - String() string - IsDir() bool - CRC32() uint32 - Size() uint64 - WriteToZip(dest string, zw *zip.Writer) error +// Checks that openInputZips deque is valid +func (izm *InputZipsManager) checkOpenZipsDeque() { + nReallyOpen := 0 + el := izm.openInputZips + for { + elNext := el.older + if elNext.newer != el { + panic(fmt.Errorf("Element:\n %p: %v\nNext:\n %p %v", el, el, elNext, elNext)) + } + if elNext == izm.openInputZips { + break + } + el = elNext + if !el.IsOpen() { + panic(fmt.Errorf("Found unopened element")) + } + nReallyOpen++ + if nReallyOpen > izm.nOpenZips { + panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips)) + } + } + if nReallyOpen > izm.nOpenZips { + panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips)) + } } -// a fileMapping specifies to copy a zip entry from one place to another -type fileMapping struct { - dest string - source zipSource +func (miz *ManagedInputZip) Name() string { + return miz.realInputZip.Name() } -func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, pyMain string, - sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool, - stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error { +func (miz *ManagedInputZip) Open() error { + return miz.owner.reopen(miz) +} - sourceByDest := make(map[string]zipSource, 0) - orderedMappings := []fileMapping{} +func (miz *ManagedInputZip) Close() error { + return miz.owner.close(miz) +} - // if dest already exists returns a non-null zipSource for the existing source - addMapping := func(dest string, source zipSource) zipSource { - mapKey := filepath.Clean(dest) - if existingSource, exists := sourceByDest[mapKey]; exists { - return existingSource - } +func (miz *ManagedInputZip) IsOpen() bool { + return miz.realInputZip.IsOpen() +} - sourceByDest[mapKey] = source - orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest}) - return nil +func (miz *ManagedInputZip) Entries() []*zip.File { + if !miz.IsOpen() { + panic(fmt.Errorf("%s: is not open", miz.Name())) } + return miz.realInputZip.Entries() +} - if manifest != "" { - if !stripDirEntries { - dirHeader := jar.MetaDirFileHeader() - dirSource := bufferEntry{dirHeader, nil} - addMapping(jar.MetaDir, dirSource) - } +// Actual processing. +func mergeZips(inputZips []InputZip, writer *zip.Writer, manifest, pyMain string, + sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool, + excludeFiles, excludeDirs []string, zipsToNotStrip map[string]bool) error { - contents, err := ioutil.ReadFile(manifest) - if err != nil { + out := NewOutputZip(writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates) + out.setExcludeFiles(excludeFiles) + out.setExcludeDirs(excludeDirs) + if manifest != "" { + if err := out.addManifest(manifest); err != nil { return err } - - fh, buf, err := jar.ManifestFileContents(contents) - if err != nil { + } + if pyMain != "" { + if err := out.addZipEntryFromFile("__main__.py", pyMain); err != nil { return err } - - fileSource := bufferEntry{fh, buf} - addMapping(jar.ManifestFile, fileSource) } - if pyMain != "" { - buf, err := ioutil.ReadFile(pyMain) + if emulatePar { + noInitPackages, err := out.getUninitializedPythonPackages(inputZips) if err != nil { return err } - fh := &zip.FileHeader{ - Name: "__main__.py", - Method: zip.Store, - UncompressedSize64: uint64(len(buf)), + for _, uninitializedPyPackage := range noInitPackages { + if err = out.addEmptyEntry(filepath.Join(uninitializedPyPackage, "__init__.py")); err != nil { + return err + } } - fh.SetMode(0700) - fh.SetModTime(jar.DefaultTime) - fileSource := bufferEntry{fh, buf} - addMapping("__main__.py", fileSource) } - if emulatePar { - // the runfiles packages needs to be populated with "__init__.py". - newPyPkgs := []string{} - // the runfiles dirs have been treated as packages. - existingPyPkgSet := make(map[string]bool) - // put existing __init__.py files to a set first. This set is used for preventing - // generated __init__.py files from overwriting existing ones. - for _, namedReader := range readers { - for _, file := range namedReader.reader.File { - if filepath.Base(file.Name) != "__init__.py" { - continue - } - pyPkg := pathBeforeLastSlash(file.Name) - if _, found := existingPyPkgSet[pyPkg]; found { - panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name)) - } else { - existingPyPkgSet[pyPkg] = true - } - } + // Finally, add entries from all the input zips. + for _, inputZip := range inputZips { + _, copyFully := zipsToNotStrip[inputZip.Name()] + if err := inputZip.Open(); err != nil { + return err } - for _, namedReader := range readers { - for _, file := range namedReader.reader.File { - var parentPath string /* the path after trimming last "/" */ - if filepath.Base(file.Name) == "__init__.py" { - // for existing __init__.py files, we should trim last "/" for twice. - // eg. a/b/c/__init__.py ---> a/b - parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name)) - } else { - parentPath = pathBeforeLastSlash(file.Name) + + for i, entry := range inputZip.Entries() { + if copyFully || !out.isEntryExcluded(entry.Name) { + if err := out.copyEntry(inputZip, i); err != nil { + return err } - populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs) } } - for _, pkg := range newPyPkgs { - var emptyBuf []byte - fh := &zip.FileHeader{ - Name: filepath.Join(pkg, "__init__.py"), - Method: zip.Store, - UncompressedSize64: uint64(len(emptyBuf)), + // Unless we need to rearrange the entries, the input zip can now be closed. + if !(emulateJar || sortEntries) { + if err := inputZip.Close(); err != nil { + return err } - fh.SetMode(0700) - fh.SetModTime(jar.DefaultTime) - fileSource := bufferEntry{fh, emptyBuf} - addMapping(filepath.Join(pkg, "__init__.py"), fileSource) } } - for _, namedReader := range readers { - _, skipStripThisZip := zipsToNotStrip[namedReader.path] - for _, file := range namedReader.reader.File { - if !skipStripThisZip { - if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil { - return err - } else if skip { - continue - } - } - if stripDirEntries && file.FileInfo().IsDir() { - continue - } + if emulateJar { + return out.writeEntries(out.jarSorted()) + } else if sortEntries { + return out.writeEntries(out.alphanumericSorted()) + } + return nil +} - // check for other files or directories destined for the same path - dest := file.Name +// Process command line +type fileList []string - // make a new entry to add - source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file} +func (f *fileList) String() string { + return `""` +} - if existingSource := addMapping(dest, source); existingSource != nil { - // handle duplicates - if existingSource.IsDir() != source.IsDir() { - return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n", - dest, existingSource, source) - } +func (f *fileList) Set(name string) error { + *f = append(*f, filepath.Clean(name)) - if ignoreDuplicates { - continue - } + return nil +} - if emulateJar && - file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass { - // Skip manifest and module info files that are not from the first input file - continue - } +type zipsToNotStripSet map[string]bool - if source.IsDir() { - continue - } +func (s zipsToNotStripSet) String() string { + return `""` +} - if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() { - continue - } +func (s zipsToNotStripSet) Set(path string) error { + s[path] = true + return nil +} - return fmt.Errorf("Duplicate path %v found in %v and %v\n", - dest, existingSource, source) - } - } - } +var ( + sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)") + emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)") + emulatePar = flag.Bool("p", false, "merge zip entries based on par format") + excludeDirs fileList + excludeFiles fileList + zipsToNotStrip = make(zipsToNotStripSet) + stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file") + manifest = flag.String("m", "", "manifest file to insert in jar") + pyMain = flag.String("pm", "", "__main__.py file to insert in par") + prefix = flag.String("prefix", "", "A file to prefix to the zip file") + ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn") +) - if emulateJar { - jarSort(orderedMappings) - } else if sortEntries { - alphanumericSort(orderedMappings) - } +func init() { + flag.Var(&excludeDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards") + flag.Var(&excludeFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards") + flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping") +} - for _, entry := range orderedMappings { - if err := entry.source.WriteToZip(entry.dest, writer); err != nil { - return err - } - } +type FileInputZip struct { + name string + reader *zip.ReadCloser +} +func (fiz *FileInputZip) Name() string { + return fiz.name +} + +func (fiz *FileInputZip) Close() error { + if fiz.IsOpen() { + reader := fiz.reader + fiz.reader = nil + return reader.Close() + } return nil } -// Sets the given directory and all its ancestor directories as Python packages. -func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) { - for pkgPath != "" { - if _, found := existingPyPkgSet[pkgPath]; !found { - existingPyPkgSet[pkgPath] = true - *newPyPkgs = append(*newPyPkgs, pkgPath) - // Gets its ancestor directory by trimming last slash. - pkgPath = pathBeforeLastSlash(pkgPath) - } else { - break - } +func (fiz *FileInputZip) Entries() []*zip.File { + if !fiz.IsOpen() { + panic(fmt.Errorf("%s: is not open", fiz.Name())) } + return fiz.reader.File +} + +func (fiz *FileInputZip) IsOpen() bool { + return fiz.reader != nil } -func pathBeforeLastSlash(path string) string { - ret := filepath.Dir(path) - // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/". - if ret == "." || ret == "/" { - return "" +func (fiz *FileInputZip) Open() error { + if fiz.IsOpen() { + return nil } - return ret + var err error + fiz.reader, err = zip.OpenReader(fiz.Name()) + return err } -func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) { - for _, dir := range stripDirs { - dir = filepath.Clean(dir) - patterns := []string{ - dir + "/", // the directory itself - dir + "/**/*", // files recursively in the directory - dir + "/**/*/", // directories recursively in the directory - } +func main() { + flag.Usage = func() { + fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] OutputZip [inputs...]") + flag.PrintDefaults() + } - for _, pattern := range patterns { - match, err := pathtools.Match(pattern, name) + // parse args + flag.Parse() + args := flag.Args() + if len(args) < 1 { + flag.Usage() + os.Exit(1) + } + outputPath := args[0] + inputs := make([]string, 0) + for _, input := range args[1:] { + if input[0] == '@' { + bytes, err := ioutil.ReadFile(input[1:]) if err != nil { - return false, fmt.Errorf("%s: %s", err.Error(), pattern) - } else if match { - if emulateJar { - // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is - // requested. - // TODO(ccross): which files does this affect? - if name != jar.MetaDir && name != jar.ManifestFile { - return true, nil - } - } - return true, nil + log.Fatal(err) } + inputs = append(inputs, soongZip.ReadRespFile(bytes)...) + continue } + inputs = append(inputs, input) + continue } - for _, pattern := range stripFiles { - if match, err := pathtools.Match(pattern, name); err != nil { - return false, fmt.Errorf("%s: %s", err.Error(), pattern) - } else if match { - return true, nil + log.SetFlags(log.Lshortfile) + + // make writer + outputZip, err := os.Create(outputPath) + if err != nil { + log.Fatal(err) + } + defer outputZip.Close() + + var offset int64 + if *prefix != "" { + prefixFile, err := os.Open(*prefix) + if err != nil { + log.Fatal(err) + } + offset, err = io.Copy(outputZip, prefixFile) + if err != nil { + log.Fatal(err) } } - return false, nil -} -func jarSort(files []fileMapping) { - sort.SliceStable(files, func(i, j int) bool { - return jar.EntryNamesLess(files[i].dest, files[j].dest) - }) -} + writer := zip.NewWriter(outputZip) + defer func() { + err := writer.Close() + if err != nil { + log.Fatal(err) + } + }() + writer.SetOffset(offset) + + if *manifest != "" && !*emulateJar { + log.Fatal(errors.New("must specify -j when specifying a manifest via -m")) + } + + if *pyMain != "" && !*emulatePar { + log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm")) + } -func alphanumericSort(files []fileMapping) { - sort.SliceStable(files, func(i, j int) bool { - return files[i].dest < files[j].dest - }) + // do merge + inputZipsManager := NewInputZipsManager(len(inputs), 1000) + inputZips := make([]InputZip, len(inputs)) + for i, input := range inputs { + inputZips[i] = inputZipsManager.Manage(&FileInputZip{name: input}) + } + err = mergeZips(inputZips, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar, + *stripDirEntries, *ignoreDuplicates, []string(excludeFiles), []string(excludeDirs), + map[string]bool(zipsToNotStrip)) + if err != nil { + log.Fatal(err) + } } diff --git a/cmd/merge_zips/merge_zips_test.go b/cmd/merge_zips/merge_zips_test.go index dbde27058..cb5843607 100644 --- a/cmd/merge_zips/merge_zips_test.go +++ b/cmd/merge_zips/merge_zips_test.go @@ -51,6 +51,39 @@ var ( moduleInfoFile = testZipEntry{jar.ModuleInfoClass, 0755, []byte("module-info")} ) +type testInputZip struct { + name string + entries []testZipEntry + reader *zip.Reader +} + +func (tiz *testInputZip) Name() string { + return tiz.name +} + +func (tiz *testInputZip) Open() error { + if tiz.reader == nil { + tiz.reader = testZipEntriesToZipReader(tiz.entries) + } + return nil +} + +func (tiz *testInputZip) Close() error { + tiz.reader = nil + return nil +} + +func (tiz *testInputZip) Entries() []*zip.File { + if tiz.reader == nil { + panic(fmt.Errorf("%s: should be open to get entries", tiz.Name())) + } + return tiz.reader.File +} + +func (tiz *testInputZip) IsOpen() bool { + return tiz.reader != nil +} + func TestMergeZips(t *testing.T) { testCases := []struct { name string @@ -207,13 +240,9 @@ func TestMergeZips(t *testing.T) { for _, test := range testCases { t.Run(test.name, func(t *testing.T) { - var readers []namedZipReader + inputZips := make([]InputZip, len(test.in)) for i, in := range test.in { - r := testZipEntriesToZipReader(in) - readers = append(readers, namedZipReader{ - path: "in" + strconv.Itoa(i), - reader: r, - }) + inputZips[i] = &testInputZip{name: "in" + strconv.Itoa(i), entries: in} } want := testZipEntriesToBuf(test.out) @@ -221,7 +250,7 @@ func TestMergeZips(t *testing.T) { out := &bytes.Buffer{} writer := zip.NewWriter(out) - err := mergeZips(readers, writer, "", "", + err := mergeZips(inputZips, writer, "", "", test.sort, test.jar, false, test.stripDirEntries, test.ignoreDuplicates, test.stripFiles, test.stripDirs, test.zipsToNotStrip) @@ -304,3 +333,60 @@ func dumpZip(buf []byte) string { return ret } + +type DummyInpuZip struct { + isOpen bool +} + +func (diz *DummyInpuZip) Name() string { + return "dummy" +} + +func (diz *DummyInpuZip) Open() error { + diz.isOpen = true + return nil +} + +func (diz *DummyInpuZip) Close() error { + diz.isOpen = false + return nil +} + +func (DummyInpuZip) Entries() []*zip.File { + panic("implement me") +} + +func (diz *DummyInpuZip) IsOpen() bool { + return diz.isOpen +} + +func TestInputZipsManager(t *testing.T) { + const nInputZips = 20 + const nMaxOpenZips = 10 + izm := NewInputZipsManager(20, 10) + managedZips := make([]InputZip, nInputZips) + for i := 0; i < nInputZips; i++ { + managedZips[i] = izm.Manage(&DummyInpuZip{}) + } + + t.Run("InputZipsManager", func(t *testing.T) { + for i, iz := range managedZips { + if err := iz.Open(); err != nil { + t.Fatalf("Step %d: open failed: %s", i, err) + return + } + if izm.nOpenZips > nMaxOpenZips { + t.Errorf("Step %d: should be <=%d open zips", i, nMaxOpenZips) + } + } + if !managedZips[nInputZips-1].IsOpen() { + t.Error("The last input should stay open") + } + for _, iz := range managedZips { + iz.Close() + } + if izm.nOpenZips > 0 { + t.Error("Some input zips are still open") + } + }) +} |