5 files changed, 272 insertions, 181 deletions
diff --git a/tools/aapt2/tools/consumers/__init__.py b/tools/aapt2/tools/consumers/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/aapt2/tools/consumers/__init__.py
diff --git a/tools/aapt2/tools/consumers/duplicates.py b/tools/aapt2/tools/consumers/duplicates.py
new file mode 100644
index 000000000000..c27979a03a38
--- /dev/null
+++ b/tools/aapt2/tools/consumers/duplicates.py
@@ -0,0 +1,132 @@
+"""
+Looks for duplicate resource definitions and removes all but the last one.
+"""
+
+import os.path
+import xml.parsers.expat
+
+class DuplicateRemover:
+    def matches(self, file_path):
+        dirname, basename = os.path.split(file_path)
+        dirname = os.path.split(dirname)[1]
+        return dirname.startswith("values") and basename.endswith(".xml")
+
+    def consume(self, xml_path, input):
+        parser = xml.parsers.expat.ParserCreate("utf-8")
+        parser.returns_unicode = True
+        tracker = ResourceDefinitionLocator(parser)
+        parser.StartElementHandler = tracker.start_element
+        parser.EndElementHandler = tracker.end_element
+        parser.Parse(input)
+
+        # Treat the input as UTF-8 or else column numbers will be wrong.
+        input_lines = input.decode('utf-8').splitlines(True)
+
+        # Extract the duplicate resource definitions, ignoring the last definition
+        # which will take precedence and be left intact.
+        duplicates = []
+        for res_name, entries in tracker.resource_definitions.iteritems():
+            if len(entries) > 1:
+                duplicates += entries[:-1]
+
+        # Sort the duplicates so that they are in order. That way we only do one pass.
+        duplicates = sorted(duplicates, key=lambda x: x.start)
+
+        last_line_no = 0
+        last_col_no = 0
+        output_lines = []
+        current_line = ""
+        for definition in duplicates:
+            print "{0}: removing duplicate resource '{3}'".format( xml_path, definition.name)
+
+            if last_line_no < definition.start[0]:
+                # The next definition is on a new line, so write what we have
+                # to the output.
+                new_line = current_line + input_lines[last_line_no][last_col_no:]
+                if not new_line.isspace():
+                    output_lines.append(new_line)
+                current_line = ""
+                last_col_no = 0
+                last_line_no += 1
+
+            # Copy all the lines up until this one.
+            for line_to_copy in xrange(last_line_no, definition.start[0]):
+                output_lines.append(input_lines[line_to_copy])
+
+            # Add to the existing line we're building, by including the prefix of this line
+            # and skipping the lines and characters until the end of this duplicate
+            # definition.
+            last_line_no = definition.start[0]
+            current_line += input_lines[last_line_no][last_col_no:definition.start[1]]
+            last_line_no = definition.end[0]
+            last_col_no = definition.end[1]
+
+        new_line = current_line + input_lines[last_line_no][last_col_no:]
+        if not new_line.isspace():
+            output_lines.append(new_line)
+        current_line = ""
+        last_line_no += 1
+        last_col_no = 0
+
+        for line_to_copy in xrange(last_line_no, len(input_lines)):
+            output_lines.append(input_lines[line_to_copy])
+
+        if len(duplicates) > 0:
+            print "deduped {0}".format(xml_path)
+            return "".join(output_lines).encode("utf-8")
+        return input
+
+class Duplicate:
+    """A small struct to maintain the positions of a Duplicate resource definition."""
+    def __init__(self, name, product, depth, start, end):
+        self.name = name
+        self.product = product
+        self.depth = depth
+        self.start = start
+        self.end = end
+
+class ResourceDefinitionLocator:
+    """Callback class for xml.parsers.expat which records resource definitions and their
+    locations.
+    """
+    def __init__(self, parser):
+        self.resource_definitions = {}
+        self._parser = parser
+        self._depth = 0
+        self._current_resource = None
+
+    def start_element(self, tag_name, attrs):
+        self._depth += 1
+        if self._depth == 2 and tag_name not in ["public", "java-symbol", "eat-comment", "skip"]:
+            resource_name = None
+            product = ""
+            try:
+                product = attrs["product"]
+            except KeyError:
+                pass
+
+            if tag_name == "item":
+                resource_name = "{0}/{1}".format(attrs["type"], attrs["name"])
+            else:
+                resource_name = "{0}/{1}".format(tag_name, attrs["name"])
+            self._current_resource = Duplicate(
+                    resource_name,
+                    product,
+                    self._depth,
+                    (self._parser.CurrentLineNumber - 1, self._parser.CurrentColumnNumber),
+                    None)
+
+    def end_element(self, tag_name):
+        if self._current_resource and self._depth == self._current_resource.depth:
+            # Record the end position of the element, which is the length of the name
+            # plus the </> symbols (len("</>") == 3).
+            self._current_resource.end = (self._parser.CurrentLineNumber - 1,
+                    self._parser.CurrentColumnNumber + 3 + len(tag_name))
+            key_name = "{0}:{1}".format(self._current_resource.name,
+                    self._current_resource.product)
+            try:
+                self.resource_definitions[key_name] += [self._current_resource]
+            except KeyError:
+                self.resource_definitions[key_name] = [self._current_resource]
+            self._current_resource = None
+        self._depth -= 1
diff --git a/tools/aapt2/tools/consumers/positional_arguments.py b/tools/aapt2/tools/consumers/positional_arguments.py
new file mode 100644
index 000000000000..176e4c9ded60
--- /dev/null
+++ b/tools/aapt2/tools/consumers/positional_arguments.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+"""
+Looks for strings with multiple substitution arguments (%d, &s, etc)
+and replaces them with positional arguments (%1$d, %2$s).
+"""
+
+import os.path
+import re
+import xml.parsers.expat
+
+class PositionalArgumentFixer:
+    def matches(self, file_path):
+        dirname, basename = os.path.split(file_path)
+        dirname = os.path.split(dirname)[1]
+        return dirname.startswith("values") and basename.endswith(".xml")
+
+    def consume(self, xml_path, input):
+        parser = xml.parsers.expat.ParserCreate("utf-8")
+        locator = SubstitutionArgumentLocator(parser)
+        parser.returns_unicode = True
+        parser.StartElementHandler = locator.start_element
+        parser.EndElementHandler = locator.end_element
+        parser.CharacterDataHandler = locator.character_data
+        parser.Parse(input)
+
+        if len(locator.arguments) > 0:
+            output = ""
+            last_index = 0
+            for arg in locator.arguments:
+                output += input[last_index:arg.start]
+                output += "%{0}$".format(arg.number)
+                last_index = arg.start + 1
+            output += input[last_index:]
+            print "fixed {0}".format(xml_path)
+            return output
+        return input
+
+class Argument:
+    def __init__(self, start, number):
+        self.start = start
+        self.number = number
+
+class SubstitutionArgumentLocator:
+    """Callback class for xml.parsers.expat which records locations of
+    substitution arguments in strings when there are more than 1 of
+    them in a single <string> tag (and they are not positional).
+    """
+    def __init__(self, parser):
+        self.arguments = []
+        self._parser = parser
+        self._depth = 0
+        self._within_string = False
+        self._current_arguments = []
+        self._next_number = 1
+
+    def start_element(self, tag_name, attrs):
+        self._depth += 1
+        if self._depth == 2 and tag_name == "string" and "translateable" not in attrs:
+            self._within_string = True
+
+    def character_data(self, data):
+        if self._within_string:
+            for m in re.finditer("%[-#+ 0,(]?\d*[bBhHsScCdoxXeEfgGaAtTn]", data):
+                start, end = m.span()
+                self._current_arguments.append(\
+                        Argument(self._parser.CurrentByteIndex + start, self._next_number))
+                self._next_number += 1
+
+    def end_element(self, tag_name):
+        if self._within_string and self._depth == 2:
+            if len(self._current_arguments) > 1:
+                self.arguments += self._current_arguments
+            self._current_arguments = []
+            self._within_string = False
+            self._next_number = 1
+        self._depth -= 1
diff --git a/tools/aapt2/tools/fix_resources.py b/tools/aapt2/tools/fix_resources.py
new file mode 100644
index 000000000000..b6fcd915d9eb
--- /dev/null
+++ b/tools/aapt2/tools/fix_resources.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+"""
+Scans each resource file in res/ applying various transformations
+to fix invalid resource files.
+"""
+
+import os
+import os.path
+import sys
+import tempfile
+
+from consumers.duplicates import DuplicateRemover
+from consumers.positional_arguments import PositionalArgumentFixer
+
+def do_it(res_path, consumers):
+    for file_path in enumerate_files(res_path):
+        eligible_consumers = filter(lambda c: c.matches(file_path), consumers)
+        if len(eligible_consumers) > 0:
+            print "checking {0} ...".format(file_path)
+
+            original_contents = read_contents(file_path)
+            contents = original_contents
+            for c in eligible_consumers:
+                contents = c.consume(file_path, contents)
+            if original_contents != contents:
+                write_contents(file_path, contents)
+
+def enumerate_files(res_path):
+    """Enumerates all files in the resource directory."""
+    values_directories = os.listdir(res_path)
+    values_directories = map(lambda f: os.path.join(res_path, f), values_directories)
+    all_files = []
+    for dir in values_directories:
+        files = os.listdir(dir)
+        files = map(lambda f: os.path.join(dir, f), files)
+        for f in files:
+            yield f
+
+def read_contents(file_path):
+    """Reads the contents of file_path without decoding."""
+    with open(file_path) as fin:
+        return fin.read()
+
+def write_contents(file_path, contents):
+    """Writes the bytes in contents to file_path by first writing to a temporary, then
+    renaming the temporary to file_path, ensuring a consistent write.
+    """
+    dirname, basename = os.path.split(file_path)
+    temp_name = ""
+    with tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False) as temp:
+        temp_name = temp.name
+        temp.write(contents)
+    os.rename(temp.name, file_path)
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print >> sys.stderr, "please specify a path to a resource directory"
+        sys.exit(1)
+
+    res_path = os.path.abspath(sys.argv[1])
+    print "looking in {0} ...".format(res_path)
+    do_it(res_path, [DuplicateRemover(), PositionalArgumentFixer()])
diff --git a/tools/aapt2/tools/remove-duplicates.py b/tools/aapt2/tools/remove-duplicates.py
deleted file mode 100644
index fb98bb73e9a4..000000000000
--- a/tools/aapt2/tools/remove-duplicates.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import os.path
-import sys
-import tempfile
-import xml.parsers.expat
-
-"""
-Scans each resource file in res/values/ looking for duplicates.
-All but the last occurrence of resource definition are removed.
-This creates no semantic changes, the resulting APK when built
-should contain the same definition.
-"""
-
-class Duplicate:
-    """A small struct to maintain the positions of a Duplicate resource definition."""
-    def __init__(self, name, product, depth, start, end):
-        self.name = name
-        self.product = product
-        self.depth = depth
-        self.start = start
-        self.end = end
-
-class ResourceDefinitionLocator:
-    """Callback class for xml.parsers.expat which records resource definitions and their
-    locations.
-    """
-    def __init__(self, parser):
-        self.resource_definitions = {}
-        self._parser = parser
-        self._depth = 0
-        self._current_resource = None
-
-    def start_element(self, tag_name, attrs):
-        self._depth += 1
-        if self._depth == 2 and tag_name not in ["public", "java-symbol", "eat-comment", "skip"]:
-            resource_name = None
-            product = ""
-            try:
-                product = attrs["product"]
-            except KeyError:
-                pass
-
-            if tag_name == "item":
-                resource_name = "{0}/{1}".format(attrs["type"], attrs["name"])
-            else:
-                resource_name = "{0}/{1}".format(tag_name, attrs["name"])
-            self._current_resource = Duplicate(
-                    resource_name,
-                    product,
-                    self._depth,
-                    (self._parser.CurrentLineNumber - 1, self._parser.CurrentColumnNumber),
-                    None)
-
-    def end_element(self, tag_name):
-        if self._current_resource and self._depth == self._current_resource.depth:
-            # Record the end position of the element, which is the length of the name
-            # plus the </> symbols (len("</>") == 3).
-            self._current_resource.end = (self._parser.CurrentLineNumber - 1,
-                    self._parser.CurrentColumnNumber + 3 + len(tag_name))
-            key_name = "{0}:{1}".format(self._current_resource.name,
-                    self._current_resource.product)
-            try:
-                self.resource_definitions[key_name] += [self._current_resource]
-            except KeyError:
-                self.resource_definitions[key_name] = [self._current_resource]
-            self._current_resource = None
-        self._depth -= 1
-
-def remove_duplicates(xml_path):
-    """Reads the input file and generates an output file with any duplicate
-    resources removed, keeping the last occurring definition and removing
-    the others. The output is written to a temporary and then renamed
-    to the original file name.
-    """
-    input = ""
-    with open(xml_path) as fin:
-        input = fin.read()
-
-    parser = xml.parsers.expat.ParserCreate("utf-8")
-    parser.returns_unicode = True
-    tracker = ResourceDefinitionLocator(parser)
-    parser.StartElementHandler = tracker.start_element
-    parser.EndElementHandler = tracker.end_element
-    parser.Parse(input)
-
-    # Treat the input as UTF-8 or else column numbers will be wrong.
-    input_lines = input.decode('utf-8').splitlines(True)
-
-    # Extract the duplicate resource definitions, ignoring the last definition
-    # which will take precedence and be left intact.
-    duplicates = []
-    for res_name, entries in tracker.resource_definitions.iteritems():
-        if len(entries) > 1:
-            duplicates += entries[:-1]
-
-    # Sort the duplicates so that they are in order. That way we only do one pass.
-    duplicates = sorted(duplicates, key=lambda x: x.start)
-
-    last_line_no = 0
-    last_col_no = 0
-    output_lines = []
-    current_line = ""
-    for definition in duplicates:
-        print "{0}:{1}:{2}: removing duplicate resource '{3}'".format(
-                xml_path, definition.start[0] + 1, definition.start[1], definition.name)
-
-        if last_line_no < definition.start[0]:
-            # The next definition is on a new line, so write what we have
-            # to the output.
-            new_line = current_line + input_lines[last_line_no][last_col_no:]
-            if not new_line.isspace():
-                output_lines.append(new_line)
-            current_line = ""
-            last_col_no = 0
-            last_line_no += 1
-
-        # Copy all the lines up until this one.
-        for line_to_copy in xrange(last_line_no, definition.start[0]):
-            output_lines.append(input_lines[line_to_copy])
-
-        # Add to the existing line we're building, by including the prefix of this line
-        # and skipping the lines and characters until the end of this duplicate definition.
-        last_line_no = definition.start[0]
-        current_line += input_lines[last_line_no][last_col_no:definition.start[1]]
-        last_line_no = definition.end[0]
-        last_col_no = definition.end[1]
-
-    new_line = current_line + input_lines[last_line_no][last_col_no:]
-    if not new_line.isspace():
-        output_lines.append(new_line)
-    current_line = ""
-    last_line_no += 1
-    last_col_no = 0
-
-    for line_to_copy in xrange(last_line_no, len(input_lines)):
-        output_lines.append(input_lines[line_to_copy])
-
-    if len(duplicates) > 0:
-        print "{0}: writing deduped copy...".format(xml_path)
-
-        # Write the lines to a temporary file.
-        dirname, basename = os.path.split(xml_path)
-        temp_name = ""
-        with tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False) as temp:
-            temp_name = temp.name
-            for line in output_lines:
-                temp.write(line.encode('utf-8'))
-
-        # Now rename that file to the original so we have an atomic write that is consistent.
-        os.rename(temp.name, xml_path)
-
-def enumerate_files(res_path):
-    """Enumerates all files in the resource directory that are XML files and
-       within a values-* subdirectory. These types of files end up compiled
-       in the resources.arsc table of an APK.
-    """
-    values_directories = os.listdir(res_path)
-    values_directories = filter(lambda f: f.startswith('values'), values_directories)
-    values_directories = map(lambda f: os.path.join(res_path, f), values_directories)
-    all_files = []
-    for dir in values_directories:
-        files = os.listdir(dir)
-        files = filter(lambda f: f.endswith('.xml'), files)
-        files = map(lambda f: os.path.join(dir, f), files)
-        all_files += files
-    return all_files
-
-if __name__ == '__main__':
-    if len(sys.argv) < 2:
-        print >> sys.stderr, "please specify a path to a resource directory"
-        sys.exit(1)
-
-    res_path = os.path.abspath(sys.argv[1])
-    print "looking in {0} ...".format(res_path)
-
-    for f in enumerate_files(res_path):
-        print "checking {0} ...".format(f)
-        remove_duplicates(f)
-