diff options
-rw-r--r-- | tools/aapt2/tools/consumers/__init__.py | 0 | ||||
-rw-r--r-- | tools/aapt2/tools/consumers/duplicates.py | 132 | ||||
-rw-r--r-- | tools/aapt2/tools/consumers/positional_arguments.py | 77 | ||||
-rw-r--r-- | tools/aapt2/tools/fix_resources.py | 63 | ||||
-rw-r--r-- | tools/aapt2/tools/remove-duplicates.py | 181 |
5 files changed, 272 insertions, 181 deletions
diff --git a/tools/aapt2/tools/consumers/__init__.py b/tools/aapt2/tools/consumers/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/aapt2/tools/consumers/__init__.py diff --git a/tools/aapt2/tools/consumers/duplicates.py b/tools/aapt2/tools/consumers/duplicates.py new file mode 100644 index 000000000000..c27979a03a38 --- /dev/null +++ b/tools/aapt2/tools/consumers/duplicates.py @@ -0,0 +1,132 @@ +""" +Looks for duplicate resource definitions and removes all but the last one. +""" + +import os.path +import xml.parsers.expat + +class DuplicateRemover: + def matches(self, file_path): + dirname, basename = os.path.split(file_path) + dirname = os.path.split(dirname)[1] + return dirname.startswith("values") and basename.endswith(".xml") + + def consume(self, xml_path, input): + parser = xml.parsers.expat.ParserCreate("utf-8") + parser.returns_unicode = True + tracker = ResourceDefinitionLocator(parser) + parser.StartElementHandler = tracker.start_element + parser.EndElementHandler = tracker.end_element + parser.Parse(input) + + # Treat the input as UTF-8 or else column numbers will be wrong. + input_lines = input.decode('utf-8').splitlines(True) + + # Extract the duplicate resource definitions, ignoring the last definition + # which will take precedence and be left intact. + duplicates = [] + for res_name, entries in tracker.resource_definitions.iteritems(): + if len(entries) > 1: + duplicates += entries[:-1] + + # Sort the duplicates so that they are in order. That way we only do one pass. + duplicates = sorted(duplicates, key=lambda x: x.start) + + last_line_no = 0 + last_col_no = 0 + output_lines = [] + current_line = "" + for definition in duplicates: + print "{0}: removing duplicate resource '{3}'".format( xml_path, definition.name) + + if last_line_no < definition.start[0]: + # The next definition is on a new line, so write what we have + # to the output. + new_line = current_line + input_lines[last_line_no][last_col_no:] + if not new_line.isspace(): + output_lines.append(new_line) + current_line = "" + last_col_no = 0 + last_line_no += 1 + + # Copy all the lines up until this one. + for line_to_copy in xrange(last_line_no, definition.start[0]): + output_lines.append(input_lines[line_to_copy]) + + # Add to the existing line we're building, by including the prefix of this line + # and skipping the lines and characters until the end of this duplicate + # definition. + last_line_no = definition.start[0] + current_line += input_lines[last_line_no][last_col_no:definition.start[1]] + last_line_no = definition.end[0] + last_col_no = definition.end[1] + + new_line = current_line + input_lines[last_line_no][last_col_no:] + if not new_line.isspace(): + output_lines.append(new_line) + current_line = "" + last_line_no += 1 + last_col_no = 0 + + for line_to_copy in xrange(last_line_no, len(input_lines)): + output_lines.append(input_lines[line_to_copy]) + + if len(duplicates) > 0: + print "deduped {0}".format(xml_path) + return "".join(output_lines).encode("utf-8") + return input + +class Duplicate: + """A small struct to maintain the positions of a Duplicate resource definition.""" + def __init__(self, name, product, depth, start, end): + self.name = name + self.product = product + self.depth = depth + self.start = start + self.end = end + +class ResourceDefinitionLocator: + """Callback class for xml.parsers.expat which records resource definitions and their + locations. + """ + def __init__(self, parser): + self.resource_definitions = {} + self._parser = parser + self._depth = 0 + self._current_resource = None + + def start_element(self, tag_name, attrs): + self._depth += 1 + if self._depth == 2 and tag_name not in ["public", "java-symbol", "eat-comment", "skip"]: + resource_name = None + product = "" + try: + product = attrs["product"] + except KeyError: + pass + + if tag_name == "item": + resource_name = "{0}/{1}".format(attrs["type"], attrs["name"]) + else: + resource_name = "{0}/{1}".format(tag_name, attrs["name"]) + self._current_resource = Duplicate( + resource_name, + product, + self._depth, + (self._parser.CurrentLineNumber - 1, self._parser.CurrentColumnNumber), + None) + + def end_element(self, tag_name): + if self._current_resource and self._depth == self._current_resource.depth: + # Record the end position of the element, which is the length of the name + # plus the </> symbols (len("</>") == 3). + self._current_resource.end = (self._parser.CurrentLineNumber - 1, + self._parser.CurrentColumnNumber + 3 + len(tag_name)) + key_name = "{0}:{1}".format(self._current_resource.name, + self._current_resource.product) + try: + self.resource_definitions[key_name] += [self._current_resource] + except KeyError: + self.resource_definitions[key_name] = [self._current_resource] + self._current_resource = None + self._depth -= 1 diff --git a/tools/aapt2/tools/consumers/positional_arguments.py b/tools/aapt2/tools/consumers/positional_arguments.py new file mode 100644 index 000000000000..176e4c9ded60 --- /dev/null +++ b/tools/aapt2/tools/consumers/positional_arguments.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +""" +Looks for strings with multiple substitution arguments (%d, &s, etc) +and replaces them with positional arguments (%1$d, %2$s). +""" + +import os.path +import re +import xml.parsers.expat + +class PositionalArgumentFixer: + def matches(self, file_path): + dirname, basename = os.path.split(file_path) + dirname = os.path.split(dirname)[1] + return dirname.startswith("values") and basename.endswith(".xml") + + def consume(self, xml_path, input): + parser = xml.parsers.expat.ParserCreate("utf-8") + locator = SubstitutionArgumentLocator(parser) + parser.returns_unicode = True + parser.StartElementHandler = locator.start_element + parser.EndElementHandler = locator.end_element + parser.CharacterDataHandler = locator.character_data + parser.Parse(input) + + if len(locator.arguments) > 0: + output = "" + last_index = 0 + for arg in locator.arguments: + output += input[last_index:arg.start] + output += "%{0}$".format(arg.number) + last_index = arg.start + 1 + output += input[last_index:] + print "fixed {0}".format(xml_path) + return output + return input + +class Argument: + def __init__(self, start, number): + self.start = start + self.number = number + +class SubstitutionArgumentLocator: + """Callback class for xml.parsers.expat which records locations of + substitution arguments in strings when there are more than 1 of + them in a single <string> tag (and they are not positional). + """ + def __init__(self, parser): + self.arguments = [] + self._parser = parser + self._depth = 0 + self._within_string = False + self._current_arguments = [] + self._next_number = 1 + + def start_element(self, tag_name, attrs): + self._depth += 1 + if self._depth == 2 and tag_name == "string" and "translateable" not in attrs: + self._within_string = True + + def character_data(self, data): + if self._within_string: + for m in re.finditer("%[-#+ 0,(]?\d*[bBhHsScCdoxXeEfgGaAtTn]", data): + start, end = m.span() + self._current_arguments.append(\ + Argument(self._parser.CurrentByteIndex + start, self._next_number)) + self._next_number += 1 + + def end_element(self, tag_name): + if self._within_string and self._depth == 2: + if len(self._current_arguments) > 1: + self.arguments += self._current_arguments + self._current_arguments = [] + self._within_string = False + self._next_number = 1 + self._depth -= 1 diff --git a/tools/aapt2/tools/fix_resources.py b/tools/aapt2/tools/fix_resources.py new file mode 100644 index 000000000000..b6fcd915d9eb --- /dev/null +++ b/tools/aapt2/tools/fix_resources.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +""" +Scans each resource file in res/ applying various transformations +to fix invalid resource files. +""" + +import os +import os.path +import sys +import tempfile + +from consumers.duplicates import DuplicateRemover +from consumers.positional_arguments import PositionalArgumentFixer + +def do_it(res_path, consumers): + for file_path in enumerate_files(res_path): + eligible_consumers = filter(lambda c: c.matches(file_path), consumers) + if len(eligible_consumers) > 0: + print "checking {0} ...".format(file_path) + + original_contents = read_contents(file_path) + contents = original_contents + for c in eligible_consumers: + contents = c.consume(file_path, contents) + if original_contents != contents: + write_contents(file_path, contents) + +def enumerate_files(res_path): + """Enumerates all files in the resource directory.""" + values_directories = os.listdir(res_path) + values_directories = map(lambda f: os.path.join(res_path, f), values_directories) + all_files = [] + for dir in values_directories: + files = os.listdir(dir) + files = map(lambda f: os.path.join(dir, f), files) + for f in files: + yield f + +def read_contents(file_path): + """Reads the contents of file_path without decoding.""" + with open(file_path) as fin: + return fin.read() + +def write_contents(file_path, contents): + """Writes the bytes in contents to file_path by first writing to a temporary, then + renaming the temporary to file_path, ensuring a consistent write. + """ + dirname, basename = os.path.split(file_path) + temp_name = "" + with tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False) as temp: + temp_name = temp.name + temp.write(contents) + os.rename(temp.name, file_path) + +if __name__ == '__main__': + if len(sys.argv) < 2: + print >> sys.stderr, "please specify a path to a resource directory" + sys.exit(1) + + res_path = os.path.abspath(sys.argv[1]) + print "looking in {0} ...".format(res_path) + do_it(res_path, [DuplicateRemover(), PositionalArgumentFixer()]) diff --git a/tools/aapt2/tools/remove-duplicates.py b/tools/aapt2/tools/remove-duplicates.py deleted file mode 100644 index fb98bb73e9a4..000000000000 --- a/tools/aapt2/tools/remove-duplicates.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python - -import os -import os.path -import sys -import tempfile -import xml.parsers.expat - -""" -Scans each resource file in res/values/ looking for duplicates. -All but the last occurrence of resource definition are removed. -This creates no semantic changes, the resulting APK when built -should contain the same definition. -""" - -class Duplicate: - """A small struct to maintain the positions of a Duplicate resource definition.""" - def __init__(self, name, product, depth, start, end): - self.name = name - self.product = product - self.depth = depth - self.start = start - self.end = end - -class ResourceDefinitionLocator: - """Callback class for xml.parsers.expat which records resource definitions and their - locations. - """ - def __init__(self, parser): - self.resource_definitions = {} - self._parser = parser - self._depth = 0 - self._current_resource = None - - def start_element(self, tag_name, attrs): - self._depth += 1 - if self._depth == 2 and tag_name not in ["public", "java-symbol", "eat-comment", "skip"]: - resource_name = None - product = "" - try: - product = attrs["product"] - except KeyError: - pass - - if tag_name == "item": - resource_name = "{0}/{1}".format(attrs["type"], attrs["name"]) - else: - resource_name = "{0}/{1}".format(tag_name, attrs["name"]) - self._current_resource = Duplicate( - resource_name, - product, - self._depth, - (self._parser.CurrentLineNumber - 1, self._parser.CurrentColumnNumber), - None) - - def end_element(self, tag_name): - if self._current_resource and self._depth == self._current_resource.depth: - # Record the end position of the element, which is the length of the name - # plus the </> symbols (len("</>") == 3). - self._current_resource.end = (self._parser.CurrentLineNumber - 1, - self._parser.CurrentColumnNumber + 3 + len(tag_name)) - key_name = "{0}:{1}".format(self._current_resource.name, - self._current_resource.product) - try: - self.resource_definitions[key_name] += [self._current_resource] - except KeyError: - self.resource_definitions[key_name] = [self._current_resource] - self._current_resource = None - self._depth -= 1 - -def remove_duplicates(xml_path): - """Reads the input file and generates an output file with any duplicate - resources removed, keeping the last occurring definition and removing - the others. The output is written to a temporary and then renamed - to the original file name. - """ - input = "" - with open(xml_path) as fin: - input = fin.read() - - parser = xml.parsers.expat.ParserCreate("utf-8") - parser.returns_unicode = True - tracker = ResourceDefinitionLocator(parser) - parser.StartElementHandler = tracker.start_element - parser.EndElementHandler = tracker.end_element - parser.Parse(input) - - # Treat the input as UTF-8 or else column numbers will be wrong. - input_lines = input.decode('utf-8').splitlines(True) - - # Extract the duplicate resource definitions, ignoring the last definition - # which will take precedence and be left intact. - duplicates = [] - for res_name, entries in tracker.resource_definitions.iteritems(): - if len(entries) > 1: - duplicates += entries[:-1] - - # Sort the duplicates so that they are in order. That way we only do one pass. - duplicates = sorted(duplicates, key=lambda x: x.start) - - last_line_no = 0 - last_col_no = 0 - output_lines = [] - current_line = "" - for definition in duplicates: - print "{0}:{1}:{2}: removing duplicate resource '{3}'".format( - xml_path, definition.start[0] + 1, definition.start[1], definition.name) - - if last_line_no < definition.start[0]: - # The next definition is on a new line, so write what we have - # to the output. - new_line = current_line + input_lines[last_line_no][last_col_no:] - if not new_line.isspace(): - output_lines.append(new_line) - current_line = "" - last_col_no = 0 - last_line_no += 1 - - # Copy all the lines up until this one. - for line_to_copy in xrange(last_line_no, definition.start[0]): - output_lines.append(input_lines[line_to_copy]) - - # Add to the existing line we're building, by including the prefix of this line - # and skipping the lines and characters until the end of this duplicate definition. - last_line_no = definition.start[0] - current_line += input_lines[last_line_no][last_col_no:definition.start[1]] - last_line_no = definition.end[0] - last_col_no = definition.end[1] - - new_line = current_line + input_lines[last_line_no][last_col_no:] - if not new_line.isspace(): - output_lines.append(new_line) - current_line = "" - last_line_no += 1 - last_col_no = 0 - - for line_to_copy in xrange(last_line_no, len(input_lines)): - output_lines.append(input_lines[line_to_copy]) - - if len(duplicates) > 0: - print "{0}: writing deduped copy...".format(xml_path) - - # Write the lines to a temporary file. - dirname, basename = os.path.split(xml_path) - temp_name = "" - with tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False) as temp: - temp_name = temp.name - for line in output_lines: - temp.write(line.encode('utf-8')) - - # Now rename that file to the original so we have an atomic write that is consistent. - os.rename(temp.name, xml_path) - -def enumerate_files(res_path): - """Enumerates all files in the resource directory that are XML files and - within a values-* subdirectory. These types of files end up compiled - in the resources.arsc table of an APK. - """ - values_directories = os.listdir(res_path) - values_directories = filter(lambda f: f.startswith('values'), values_directories) - values_directories = map(lambda f: os.path.join(res_path, f), values_directories) - all_files = [] - for dir in values_directories: - files = os.listdir(dir) - files = filter(lambda f: f.endswith('.xml'), files) - files = map(lambda f: os.path.join(dir, f), files) - all_files += files - return all_files - -if __name__ == '__main__': - if len(sys.argv) < 2: - print >> sys.stderr, "please specify a path to a resource directory" - sys.exit(1) - - res_path = os.path.abspath(sys.argv[1]) - print "looking in {0} ...".format(res_path) - - for f in enumerate_files(res_path): - print "checking {0} ...".format(f) - remove_duplicates(f) - |