1 files changed, 570 insertions, 0 deletions
diff --git a/tools/checker.py b/tools/checker.py
new file mode 100755
index 0000000000..82a1e6bd22
--- /dev/null
+++ b/tools/checker.py
@@ -0,0 +1,570 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Checker is a testing tool which compiles a given test file and compares the
+# state of the control-flow graph before and after each optimization pass
+# against a set of assertions specified alongside the tests.
+#
+# Tests are written in Java, turned into DEX and compiled with the Optimizing
+# compiler. "Check lines" are comments in the Java file which begin with prefix
+# 'CHECK' followed by a pattern that the engine attempts to match in the
+# compiler-generated output.
+#
+# Assertions are tested in groups which correspond to the individual compiler
+# passes. Each group of check lines therefore must start with a 'CHECK-START'
+# header which specifies the output group it should be tested against. The group
+# name must exactly match one of the groups recognized in the output (they can
+# be listed with the '--list-groups' command-line flag).
+#
+# Check line patterns are treated as plain text rather than regular expressions
+# but are whitespace agnostic.
+#
+# Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
+# curly brackets need to be used inside the body of the regex, they need to be
+# enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
+# the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
+#
+# Regex patterns can be named and referenced later. A new variable is defined
+# with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are
+# only valid within the scope of the defining group. Within a group they cannot
+# be redefined or used undefined.
+#
+# Example:
+#   The following assertions can be placed in a Java source file:
+#
+#   // CHECK-START: int MyClass.MyMethod() constant_folding (after)
+#   // CHECK:         [[ID:i[0-9]+]] IntConstant {{11|22}}
+#   // CHECK:                        Return [ [[ID]] ]
+#
+#   The engine will attempt to match the check lines against the output of the
+#   group named on the first line. Together they verify that the CFG after
+#   constant folding returns an integer constant with value either 11 or 22.
+#
+
+import argparse
+import os
+import re
+import shutil
+import sys
+import tempfile
+from subprocess import check_call
+
+class CommonEqualityMixin:
+  """Mixin for class equality as equality of the fields."""
+  def __eq__(self, other):
+    return (isinstance(other, self.__class__)
+           and self.__dict__ == other.__dict__)
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def __repr__(self):
+    return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
+
+
+class CheckElement(CommonEqualityMixin):
+  """Single element of the check line."""
+
+  class Variant(object):
+    """Supported language constructs."""
+    Text, Pattern, VarRef, VarDef = range(4)
+
+  def __init__(self, variant, name, pattern):
+    self.variant = variant
+    self.name = name
+    self.pattern = pattern
+
+  @staticmethod
+  def parseText(text):
+    return CheckElement(CheckElement.Variant.Text, None, re.escape(text))
+
+  @staticmethod
+  def parsePattern(patternElem):
+    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:len(patternElem)-2])
+
+  @staticmethod
+  def parseVariable(varElem):
+    colonPos = varElem.find(":")
+    if colonPos == -1:
+      # Variable reference
+      name = varElem[2:len(varElem)-2]
+      return CheckElement(CheckElement.Variant.VarRef, name, None)
+    else:
+      # Variable definition
+      name = varElem[2:colonPos]
+      body = varElem[colonPos+1:len(varElem)-2]
+      return CheckElement(CheckElement.Variant.VarDef, name, body)
+
+
+class CheckLine(CommonEqualityMixin):
+  """Representation of a single assertion in the check file formed of one or
+     more regex elements. Matching against an output line is successful only
+     if all regex elements can be matched in the given order."""
+
+  def __init__(self, lineContent, lineNo=-1):
+    lineContent = lineContent.strip()
+
+    self.lineNo = lineNo
+    self.content = lineContent
+
+    self.lineParts = self.__parse(lineContent)
+    if not self.lineParts:
+      raise Exception("Empty check line")
+
+  # Returns True if the given Match object was at the beginning of the line.
+  def __isMatchAtStart(self, match):
+    return (match is not None) and (match.start() == 0)
+
+  # Takes in a list of Match objects and returns the minimal start point among
+  # them. If there aren't any successful matches it returns the length of
+  # the searched string.
+  def __firstMatch(self, matches, string):
+    starts = map(lambda m: len(string) if m is None else m.start(), matches)
+    return min(starts)
+
+  # Returns the regex for finding a regex pattern in the check line.
+  def __getPatternRegex(self):
+    rStartSym = "\{\{"
+    rEndSym = "\}\}"
+    rBody = ".+?"
+    return rStartSym + rBody + rEndSym
+
+  # Returns the regex for finding a variable use in the check line.
+  def __getVariableRegex(self):
+    rStartSym = "\[\["
+    rEndSym = "\]\]"
+    rStartOptional = "("
+    rEndOptional = ")?"
+    rName = "[a-zA-Z][a-zA-Z0-9]*"
+    rSeparator = ":"
+    rBody = ".+?"
+    return rStartSym + rName + rStartOptional + rSeparator + rBody + rEndOptional + rEndSym
+
+  # This method parses the content of a check line stripped of the initial
+  # comment symbol and the CHECK keyword.
+  def __parse(self, line):
+    lineParts = []
+    # Loop as long as there is something to parse.
+    while line:
+      # Search for the nearest occurrence of the special markers.
+      matchWhitespace = re.search("\s+", line)
+      matchPattern = re.search(self.__getPatternRegex(), line)
+      matchVariable = re.search(self.__getVariableRegex(), line)
+
+      # If one of the above was identified at the current position, extract them
+      # from the line, parse them and add to the list of line parts.
+      if self.__isMatchAtStart(matchWhitespace):
+        # We want to be whitespace-agnostic so whenever a check line contains
+        # a whitespace, we add a regex pattern for an arbitrary non-zero number
+        # of whitespaces.
+        line = line[matchWhitespace.end():]
+        lineParts.append(CheckElement.parsePattern("{{\s+}}"))
+      elif self.__isMatchAtStart(matchPattern):
+        pattern = line[0:matchPattern.end()]
+        line = line[matchPattern.end():]
+        lineParts.append(CheckElement.parsePattern(pattern))
+      elif self.__isMatchAtStart(matchVariable):
+        var = line[0:matchVariable.end()]
+        line = line[matchVariable.end():]
+        lineParts.append(CheckElement.parseVariable(var))
+      else:
+        # If we're not currently looking at a special marker, this is a plain
+        # text match all the way until the first special marker (or the end
+        # of the line).
+        firstMatch = self.__firstMatch([ matchWhitespace, matchPattern, matchVariable ], line)
+        text = line[0:firstMatch]
+        line = line[firstMatch:]
+        lineParts.append(CheckElement.parseText(text))
+    return lineParts
+
+  # Returns the regex pattern to be matched in the output line. Variable
+  # references are substituted with their current values provided in the
+  # 'varState' argument.
+  # An exception is raised if a referenced variable is undefined.
+  def __generatePattern(self, linePart, varState):
+    if linePart.variant == CheckElement.Variant.VarRef:
+      try:
+        return re.escape(varState[linePart.name])
+      except KeyError:
+        raise Exception("Use of undefined variable '" + linePart.name + "' " +
+                        "(line " + str(self.lineNo))
+    else:
+      return linePart.pattern
+
+  # Attempts to match the check line against a line from the output file with
+  # the given initial variable values. It returns the new variable state if
+  # successful and None otherwise.
+  def match(self, outputLine, initialVarState):
+    initialSearchFrom = 0
+    initialPattern = self.__generatePattern(self.lineParts[0], initialVarState)
+    while True:
+      # Search for the first element on the regex parts list. This will mark
+      # the point on the line from which we will attempt to match the rest of
+      # the check pattern. If this iteration produces only a partial match,
+      # the next iteration will start searching further in the output.
+      firstMatch = re.search(initialPattern, outputLine[initialSearchFrom:])
+      if firstMatch is None:
+        return None
+      matchStart = initialSearchFrom + firstMatch.start()
+      initialSearchFrom += firstMatch.start() + 1
+
+      # Do the full matching on a shadow copy of the variable state. If the
+      # matching fails half-way, we will not need to revert the state.
+      varState = dict(initialVarState)
+
+      # Now try to parse all of the parts of the check line in the right order.
+      # Variable values are updated on-the-fly, meaning that a variable can
+      # be referenced immediately after its definition.
+      fullyMatched = True
+      for part in self.lineParts:
+        pattern = self.__generatePattern(part, varState)
+        match = re.match(pattern, outputLine[matchStart:])
+        if match is None:
+          fullyMatched = False
+          break
+        matchEnd = matchStart + match.end()
+        if part.variant == CheckElement.Variant.VarDef:
+          if part.name in varState:
+            raise Exception("Redefinition of variable '" + part.name + "'" +
+                            " (line " + str(self.lineNo) + ")")
+          varState[part.name] = outputLine[matchStart:matchEnd]
+        matchStart = matchEnd
+
+      # Return the new variable state if all parts were successfully matched.
+      # Otherwise loop and try to find another start point on the same line.
+      if fullyMatched:
+        return varState
+
+
+class CheckGroup(CommonEqualityMixin):
+  """Represents a named collection of check lines which are to be matched
+     against an output group of the same name."""
+
+  def __init__(self, name, lines):
+    if name:
+      self.name = name
+    else:
+      raise Exception("Check group does not have a name")
+    if lines:
+      self.lines = lines
+    else:
+      raise Exception("Check group " + self.name + " does not have a body")
+
+  def __headAndTail(self, list):
+    return list[0], list[1:]
+
+  # The driver of matching inside a group. It simultaneously reads lines from
+  # the output and check groups and attempts to match them against each other
+  # in the correct order.
+  def match(self, outputGroup):
+    readOutputLines = 0
+    lastMatch = 0
+
+    # Check and output lines which remain to be matched.
+    checkLines = self.lines
+    outputLines = outputGroup.body
+    varState = {}
+
+    # Retrieve the next check line.
+    while checkLines:
+      checkLine, checkLines = self.__headAndTail(checkLines)
+      foundMatch = False
+
+      # Retrieve the next output line.
+      while outputLines:
+        outputLine, outputLines = self.__headAndTail(outputLines)
+        readOutputLines += 1
+
+        # Try to match the current lines against each other. If successful,
+        # save the new state of variables and continue to the next check line.
+        newVarState = checkLine.match(outputLine, varState)
+        if newVarState is not None:
+          varState = newVarState
+          lastMatch = readOutputLines
+          foundMatch = True
+          break
+      if not foundMatch:
+        raise Exception("Could not match check line \"" + checkLine.content + "\" from line " +
+                        str(lastMatch+1) + " of the output. [vars=" + str(varState) + "]")
+
+  @staticmethod
+  def parse(name, lines):
+    return CheckGroup(name, list(map(lambda line: CheckLine(line), lines)))
+
+
+class OutputGroup(CommonEqualityMixin):
+  """Represents a named part of the test output against which a check group of
+     the same name is to be matched."""
+
+  def __init__(self, name, body):
+    if name:
+      self.name = name
+    else:
+      raise Exception("Output group does not have a name")
+    if body:
+      self.body = body
+    else:
+      raise Exception("Output group " + self.name + " does not have a body")
+
+
+class FileSplitMixin(object):
+  """Mixin for representing text files which need to be split into smaller
+     chunks before being parsed."""
+
+  def _parseStream(self, stream):
+    lineNo = 0
+    allGroups = []
+    currentGroup = None
+
+    for line in stream:
+      lineNo += 1
+      line = line.strip()
+      if not line:
+        continue
+
+      # Let the child class process the line and return information about it.
+      # The _processLine method can modify the content of the line (or delete it
+      # entirely) and specify whether it starts a new group.
+      processedLine, newGroupName = self._processLine(line, lineNo)
+      if newGroupName is not None:
+        currentGroup = (newGroupName, [])
+        allGroups.append(currentGroup)
+      if processedLine is not None:
+        currentGroup[1].append(processedLine)
+
+    # Finally, take the generated line groups and let the child class process
+    # each one before storing the final outcome.
+    return list(map(lambda group: self._processGroup(group[0], group[1]), allGroups))
+
+
+class CheckFile(FileSplitMixin):
+  """Collection of check groups extracted from the input test file."""
+
+  def __init__(self, prefix, checkStream):
+    self.prefix = prefix
+    self.groups = self._parseStream(checkStream)
+
+  # Attempts to parse a check line. The regex searches for a comment symbol
+  # followed by the CHECK keyword, given attribute and a colon at the very
+  # beginning of the line. Whitespaces are ignored.
+  def _extractLine(self, prefix, line):
+    ignoreWhitespace = "\s*"
+    commentSymbols = ["//", "#"]
+    prefixRegex = ignoreWhitespace + \
+                  "(" + "|".join(commentSymbols) + ")" + \
+                  ignoreWhitespace + \
+                  prefix + ":"
+
+    # The 'match' function succeeds only if the pattern is matched at the
+    # beginning of the line.
+    match = re.match(prefixRegex, line)
+    if match is not None:
+      return line[match.end():].strip()
+    else:
+      return None
+
+  def _processLine(self, line, lineNo):
+    startLine = self._extractLine(self.prefix + "-START", line)
+    if startLine is not None:
+      # Line starts with the CHECK-START keyword, start a new group
+      return (None, startLine)
+    else:
+      # Otherwise try to parse it as a standard CHECK line. If unsuccessful,
+      # _extractLine will return None and the line will be ignored.
+      return (self._extractLine(self.prefix, line), None)
+
+  def _exceptionLineOutsideGroup(self, line, lineNo):
+    raise Exception("Check file line lies outside a group (line " + str(lineNo) + ")")
+
+  def _processGroup(self, name, lines):
+    return CheckGroup.parse(name, lines)
+
+  def match(self, outputFile, printInfo=False):
+    for checkGroup in self.groups:
+      # TODO: Currently does not handle multiple occurrences of the same group
+      # name, e.g. when a pass is run multiple times. It will always try to
+      # match a check group against the first output group of the same name.
+      outputGroup = outputFile.findGroup(checkGroup.name)
+      if outputGroup is None:
+        raise Exception("Group " + checkGroup.name + " not found in the output")
+      if printInfo:
+        print("TEST " + checkGroup.name + "... ", end="", flush=True)
+      try:
+        checkGroup.match(outputGroup)
+        if printInfo:
+          print("PASSED")
+      except Exception as e:
+        if printInfo:
+          print("FAILED!")
+        raise e
+
+
+class OutputFile(FileSplitMixin):
+  """Representation of the output generated by the test and split into groups
+     within which the checks are performed.
+
+     C1visualizer format is parsed with a state machine which differentiates
+     between the 'compilation' and 'cfg' blocks. The former marks the beginning
+     of a method. It is parsed for the method's name but otherwise ignored. Each
+     subsequent CFG block represents one stage of the compilation pipeline and
+     is parsed into an output group named "<method name> <pass name>".
+     """
+
+  class ParsingState:
+    OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
+
+  def __init__(self, outputStream):
+    # Initialize the state machine
+    self.lastMethodName = None
+    self.state = OutputFile.ParsingState.OutsideBlock
+    self.groups = self._parseStream(outputStream)
+
+  def _processLine(self, line, lineNo):
+    if self.state == OutputFile.ParsingState.StartingCfgBlock:
+      # Previous line started a new 'cfg' block which means that this one must
+      # contain the name of the pass (this is enforced by C1visualizer).
+      if re.match("name\s+\"[^\"]+\"", line):
+        # Extract the pass name, prepend it with the name of the method and
+        # return as the beginning of a new group.
+        self.state = OutputFile.ParsingState.InsideCfgBlock
+        return (None, self.lastMethodName + " " + line.split("\"")[1])
+      else:
+        raise Exception("Expected group name in output file (line " + str(lineNo) + ")")
+
+    elif self.state == OutputFile.ParsingState.InsideCfgBlock:
+      if line == "end_cfg":
+        self.state = OutputFile.ParsingState.OutsideBlock
+        return (None, None)
+      else:
+        return (line, None)
+
+    elif self.state == OutputFile.ParsingState.InsideCompilationBlock:
+      # Search for the method's name. Format: method "<name>"
+      if re.match("method\s+\"[^\"]+\"", line):
+        self.lastMethodName = line.split("\"")[1]
+      elif line == "end_compilation":
+        self.state = OutputFile.ParsingState.OutsideBlock
+      return (None, None)
+
+    else:  # self.state == OutputFile.ParsingState.OutsideBlock:
+      if line == "begin_cfg":
+        # The line starts a new group but we'll wait until the next line from
+        # which we can extract the name of the pass.
+        if self.lastMethodName is None:
+          raise Exception("Output contains a pass without a method header" +
+                          " (line " + str(lineNo) + ")")
+        self.state = OutputFile.ParsingState.StartingCfgBlock
+        return (None, None)
+      elif line == "begin_compilation":
+        self.state = OutputFile.ParsingState.InsideCompilationBlock
+        return (None, None)
+      else:
+        raise Exception("Output line lies outside a group (line " + str(lineNo) + ")")
+
+  def _processGroup(self, name, lines):
+    return OutputGroup(name, lines)
+
+  def findGroup(self, name):
+    for group in self.groups:
+      if group.name == name:
+        return group
+    return None
+
+
+def ParseArguments():
+  parser = argparse.ArgumentParser()
+  parser.add_argument("test_file", help="the source of the test with checking annotations")
+  parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
+                      help="prefix of checks in the test file (default: CHECK)")
+  parser.add_argument("--list-groups", dest="list_groups", action="store_true",
+                      help="print a list of all groups found in the test output")
+  parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP",
+                      help="print the contents of an output group")
+  return parser.parse_args()
+
+
+class cd:
+  """Helper class which temporarily changes the working directory."""
+
+  def __init__(self, newPath):
+    self.newPath = newPath
+
+  def __enter__(self):
+    self.savedPath = os.getcwd()
+    os.chdir(self.newPath)
+
+  def __exit__(self, etype, value, traceback):
+    os.chdir(self.savedPath)
+
+
+def CompileTest(inputFile, tempFolder):
+  classFolder = tempFolder + "/classes"
+  dexFile = tempFolder + "/test.dex"
+  oatFile = tempFolder + "/test.oat"
+  outputFile = tempFolder + "/art.cfg"
+  os.makedirs(classFolder)
+
+  # Build a DEX from the source file. We pass "--no-optimize" to dx to avoid
+  # interference with its optimizations.
+  check_call(["javac", "-d", classFolder, inputFile])
+  check_call(["dx", "--dex", "--no-optimize", "--output=" + dexFile, classFolder])
+
+  # Run dex2oat and export the HGraph. The output is stored into ${PWD}/art.cfg.
+  with cd(tempFolder):
+    check_call(["dex2oat", "-j1", "--dump-passes", "--compiler-backend=Optimizing",
+                "--android-root=" + os.environ["ANDROID_HOST_OUT"],
+                "--boot-image=" + os.environ["ANDROID_HOST_OUT"] + "/framework/core-optimizing.art",
+                "--runtime-arg", "-Xnorelocate", "--dex-file=" + dexFile, "--oat-file=" + oatFile])
+
+  return outputFile
+
+
+def ListGroups(outputFilename):
+  outputFile = OutputFile(open(outputFilename, "r"))
+  for group in outputFile.groups:
+    print(group.name)
+
+
+def DumpGroup(outputFilename, groupName):
+  outputFile = OutputFile(open(outputFilename, "r"))
+  group = outputFile.findGroup(groupName)
+  if group:
+    print("\n".join(group.body))
+  else:
+    raise Exception("Check group " + groupName + " not found in the output")
+
+
+def RunChecks(checkPrefix, checkFilename, outputFilename):
+  checkFile = CheckFile(checkPrefix, open(checkFilename, "r"))
+  outputFile = OutputFile(open(outputFilename, "r"))
+  checkFile.match(outputFile, True)
+
+
+if __name__ == "__main__":
+  args = ParseArguments()
+  tempFolder = tempfile.mkdtemp()
+
+  try:
+    outputFile = CompileTest(args.test_file, tempFolder)
+    if args.list_groups:
+      ListGroups(outputFile)
+    elif args.dump_group:
+      DumpGroup(outputFile, args.dump_group)
+    else:
+      RunChecks(args.check_prefix, args.test_file, outputFile)
+  finally:
+    shutil.rmtree(tempFolder)