summaryrefslogtreecommitdiff
path: root/tools/aapt2/remove-duplicates.py
diff options
context:
space:
mode:
author Adam Lesinski <adamlesinski@google.com> 2016-08-02 10:15:33 -0700
committer Adam Lesinski <adamlesinski@google.com> 2016-08-02 10:15:33 -0700
commit98bda74eb58c2e0ee1e556f69dfb4baecb3f4498 (patch)
tree8bf7f59cb970a55ac81895469cb03c2ea8449788 /tools/aapt2/remove-duplicates.py
parent37f0905daf1f800a747af10f4322c2e63f53dc61 (diff)
Move utility scripts to tools/
Change-Id: Iddb755f3cec0a32801128b4eb20f2c185b0cc626
Diffstat (limited to 'tools/aapt2/remove-duplicates.py')
-rw-r--r--tools/aapt2/remove-duplicates.py181
1 files changed, 0 insertions, 181 deletions
diff --git a/tools/aapt2/remove-duplicates.py b/tools/aapt2/remove-duplicates.py
deleted file mode 100644
index fb98bb73e9a4..000000000000
--- a/tools/aapt2/remove-duplicates.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import os.path
-import sys
-import tempfile
-import xml.parsers.expat
-
-"""
-Scans each resource file in res/values/ looking for duplicates.
-All but the last occurrence of resource definition are removed.
-This creates no semantic changes, the resulting APK when built
-should contain the same definition.
-"""
-
-class Duplicate:
- """A small struct to maintain the positions of a Duplicate resource definition."""
- def __init__(self, name, product, depth, start, end):
- self.name = name
- self.product = product
- self.depth = depth
- self.start = start
- self.end = end
-
-class ResourceDefinitionLocator:
- """Callback class for xml.parsers.expat which records resource definitions and their
- locations.
- """
- def __init__(self, parser):
- self.resource_definitions = {}
- self._parser = parser
- self._depth = 0
- self._current_resource = None
-
- def start_element(self, tag_name, attrs):
- self._depth += 1
- if self._depth == 2 and tag_name not in ["public", "java-symbol", "eat-comment", "skip"]:
- resource_name = None
- product = ""
- try:
- product = attrs["product"]
- except KeyError:
- pass
-
- if tag_name == "item":
- resource_name = "{0}/{1}".format(attrs["type"], attrs["name"])
- else:
- resource_name = "{0}/{1}".format(tag_name, attrs["name"])
- self._current_resource = Duplicate(
- resource_name,
- product,
- self._depth,
- (self._parser.CurrentLineNumber - 1, self._parser.CurrentColumnNumber),
- None)
-
- def end_element(self, tag_name):
- if self._current_resource and self._depth == self._current_resource.depth:
- # Record the end position of the element, which is the length of the name
- # plus the </> symbols (len("</>") == 3).
- self._current_resource.end = (self._parser.CurrentLineNumber - 1,
- self._parser.CurrentColumnNumber + 3 + len(tag_name))
- key_name = "{0}:{1}".format(self._current_resource.name,
- self._current_resource.product)
- try:
- self.resource_definitions[key_name] += [self._current_resource]
- except KeyError:
- self.resource_definitions[key_name] = [self._current_resource]
- self._current_resource = None
- self._depth -= 1
-
-def remove_duplicates(xml_path):
- """Reads the input file and generates an output file with any duplicate
- resources removed, keeping the last occurring definition and removing
- the others. The output is written to a temporary and then renamed
- to the original file name.
- """
- input = ""
- with open(xml_path) as fin:
- input = fin.read()
-
- parser = xml.parsers.expat.ParserCreate("utf-8")
- parser.returns_unicode = True
- tracker = ResourceDefinitionLocator(parser)
- parser.StartElementHandler = tracker.start_element
- parser.EndElementHandler = tracker.end_element
- parser.Parse(input)
-
- # Treat the input as UTF-8 or else column numbers will be wrong.
- input_lines = input.decode('utf-8').splitlines(True)
-
- # Extract the duplicate resource definitions, ignoring the last definition
- # which will take precedence and be left intact.
- duplicates = []
- for res_name, entries in tracker.resource_definitions.iteritems():
- if len(entries) > 1:
- duplicates += entries[:-1]
-
- # Sort the duplicates so that they are in order. That way we only do one pass.
- duplicates = sorted(duplicates, key=lambda x: x.start)
-
- last_line_no = 0
- last_col_no = 0
- output_lines = []
- current_line = ""
- for definition in duplicates:
- print "{0}:{1}:{2}: removing duplicate resource '{3}'".format(
- xml_path, definition.start[0] + 1, definition.start[1], definition.name)
-
- if last_line_no < definition.start[0]:
- # The next definition is on a new line, so write what we have
- # to the output.
- new_line = current_line + input_lines[last_line_no][last_col_no:]
- if not new_line.isspace():
- output_lines.append(new_line)
- current_line = ""
- last_col_no = 0
- last_line_no += 1
-
- # Copy all the lines up until this one.
- for line_to_copy in xrange(last_line_no, definition.start[0]):
- output_lines.append(input_lines[line_to_copy])
-
- # Add to the existing line we're building, by including the prefix of this line
- # and skipping the lines and characters until the end of this duplicate definition.
- last_line_no = definition.start[0]
- current_line += input_lines[last_line_no][last_col_no:definition.start[1]]
- last_line_no = definition.end[0]
- last_col_no = definition.end[1]
-
- new_line = current_line + input_lines[last_line_no][last_col_no:]
- if not new_line.isspace():
- output_lines.append(new_line)
- current_line = ""
- last_line_no += 1
- last_col_no = 0
-
- for line_to_copy in xrange(last_line_no, len(input_lines)):
- output_lines.append(input_lines[line_to_copy])
-
- if len(duplicates) > 0:
- print "{0}: writing deduped copy...".format(xml_path)
-
- # Write the lines to a temporary file.
- dirname, basename = os.path.split(xml_path)
- temp_name = ""
- with tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False) as temp:
- temp_name = temp.name
- for line in output_lines:
- temp.write(line.encode('utf-8'))
-
- # Now rename that file to the original so we have an atomic write that is consistent.
- os.rename(temp.name, xml_path)
-
-def enumerate_files(res_path):
- """Enumerates all files in the resource directory that are XML files and
- within a values-* subdirectory. These types of files end up compiled
- in the resources.arsc table of an APK.
- """
- values_directories = os.listdir(res_path)
- values_directories = filter(lambda f: f.startswith('values'), values_directories)
- values_directories = map(lambda f: os.path.join(res_path, f), values_directories)
- all_files = []
- for dir in values_directories:
- files = os.listdir(dir)
- files = filter(lambda f: f.endswith('.xml'), files)
- files = map(lambda f: os.path.join(dir, f), files)
- all_files += files
- return all_files
-
-if __name__ == '__main__':
- if len(sys.argv) < 2:
- print >> sys.stderr, "please specify a path to a resource directory"
- sys.exit(1)
-
- res_path = os.path.abspath(sys.argv[1])
- print "looking in {0} ...".format(res_path)
-
- for f in enumerate_files(res_path):
- print "checking {0} ...".format(f)
- remove_duplicates(f)
-