diff options
Diffstat (limited to 'tools/checker.py')
| -rwxr-xr-x | tools/checker.py | 570 | 
1 files changed, 570 insertions, 0 deletions
diff --git a/tools/checker.py b/tools/checker.py new file mode 100755 index 0000000000..82a1e6bd22 --- /dev/null +++ b/tools/checker.py @@ -0,0 +1,570 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +#   http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Checker is a testing tool which compiles a given test file and compares the +# state of the control-flow graph before and after each optimization pass +# against a set of assertions specified alongside the tests. +# +# Tests are written in Java, turned into DEX and compiled with the Optimizing +# compiler. "Check lines" are comments in the Java file which begin with prefix +# 'CHECK' followed by a pattern that the engine attempts to match in the +# compiler-generated output. +# +# Assertions are tested in groups which correspond to the individual compiler +# passes. Each group of check lines therefore must start with a 'CHECK-START' +# header which specifies the output group it should be tested against. The group +# name must exactly match one of the groups recognized in the output (they can +# be listed with the '--list-groups' command-line flag). +# +# Check line patterns are treated as plain text rather than regular expressions +# but are whitespace agnostic. +# +# Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If +# curly brackets need to be used inside the body of the regex, they need to be +# enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse +# the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'. +# +# Regex patterns can be named and referenced later. A new variable is defined +# with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are +# only valid within the scope of the defining group. Within a group they cannot +# be redefined or used undefined. +# +# Example: +#   The following assertions can be placed in a Java source file: +# +#   // CHECK-START: int MyClass.MyMethod() constant_folding (after) +#   // CHECK:         [[ID:i[0-9]+]] IntConstant {{11|22}} +#   // CHECK:                        Return [ [[ID]] ] +# +#   The engine will attempt to match the check lines against the output of the +#   group named on the first line. Together they verify that the CFG after +#   constant folding returns an integer constant with value either 11 or 22. +# + +import argparse +import os +import re +import shutil +import sys +import tempfile +from subprocess import check_call + +class CommonEqualityMixin: +  """Mixin for class equality as equality of the fields.""" +  def __eq__(self, other): +    return (isinstance(other, self.__class__) +           and self.__dict__ == other.__dict__) + +  def __ne__(self, other): +    return not self.__eq__(other) + +  def __repr__(self): +    return "<%s: %s>" % (type(self).__name__, str(self.__dict__)) + + +class CheckElement(CommonEqualityMixin): +  """Single element of the check line.""" + +  class Variant(object): +    """Supported language constructs.""" +    Text, Pattern, VarRef, VarDef = range(4) + +  def __init__(self, variant, name, pattern): +    self.variant = variant +    self.name = name +    self.pattern = pattern + +  @staticmethod +  def parseText(text): +    return CheckElement(CheckElement.Variant.Text, None, re.escape(text)) + +  @staticmethod +  def parsePattern(patternElem): +    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:len(patternElem)-2]) + +  @staticmethod +  def parseVariable(varElem): +    colonPos = varElem.find(":") +    if colonPos == -1: +      # Variable reference +      name = varElem[2:len(varElem)-2] +      return CheckElement(CheckElement.Variant.VarRef, name, None) +    else: +      # Variable definition +      name = varElem[2:colonPos] +      body = varElem[colonPos+1:len(varElem)-2] +      return CheckElement(CheckElement.Variant.VarDef, name, body) + + +class CheckLine(CommonEqualityMixin): +  """Representation of a single assertion in the check file formed of one or +     more regex elements. Matching against an output line is successful only +     if all regex elements can be matched in the given order.""" + +  def __init__(self, lineContent, lineNo=-1): +    lineContent = lineContent.strip() + +    self.lineNo = lineNo +    self.content = lineContent + +    self.lineParts = self.__parse(lineContent) +    if not self.lineParts: +      raise Exception("Empty check line") + +  # Returns True if the given Match object was at the beginning of the line. +  def __isMatchAtStart(self, match): +    return (match is not None) and (match.start() == 0) + +  # Takes in a list of Match objects and returns the minimal start point among +  # them. If there aren't any successful matches it returns the length of +  # the searched string. +  def __firstMatch(self, matches, string): +    starts = map(lambda m: len(string) if m is None else m.start(), matches) +    return min(starts) + +  # Returns the regex for finding a regex pattern in the check line. +  def __getPatternRegex(self): +    rStartSym = "\{\{" +    rEndSym = "\}\}" +    rBody = ".+?" +    return rStartSym + rBody + rEndSym + +  # Returns the regex for finding a variable use in the check line. +  def __getVariableRegex(self): +    rStartSym = "\[\[" +    rEndSym = "\]\]" +    rStartOptional = "(" +    rEndOptional = ")?" +    rName = "[a-zA-Z][a-zA-Z0-9]*" +    rSeparator = ":" +    rBody = ".+?" +    return rStartSym + rName + rStartOptional + rSeparator + rBody + rEndOptional + rEndSym + +  # This method parses the content of a check line stripped of the initial +  # comment symbol and the CHECK keyword. +  def __parse(self, line): +    lineParts = [] +    # Loop as long as there is something to parse. +    while line: +      # Search for the nearest occurrence of the special markers. +      matchWhitespace = re.search("\s+", line) +      matchPattern = re.search(self.__getPatternRegex(), line) +      matchVariable = re.search(self.__getVariableRegex(), line) + +      # If one of the above was identified at the current position, extract them +      # from the line, parse them and add to the list of line parts. +      if self.__isMatchAtStart(matchWhitespace): +        # We want to be whitespace-agnostic so whenever a check line contains +        # a whitespace, we add a regex pattern for an arbitrary non-zero number +        # of whitespaces. +        line = line[matchWhitespace.end():] +        lineParts.append(CheckElement.parsePattern("{{\s+}}")) +      elif self.__isMatchAtStart(matchPattern): +        pattern = line[0:matchPattern.end()] +        line = line[matchPattern.end():] +        lineParts.append(CheckElement.parsePattern(pattern)) +      elif self.__isMatchAtStart(matchVariable): +        var = line[0:matchVariable.end()] +        line = line[matchVariable.end():] +        lineParts.append(CheckElement.parseVariable(var)) +      else: +        # If we're not currently looking at a special marker, this is a plain +        # text match all the way until the first special marker (or the end +        # of the line). +        firstMatch = self.__firstMatch([ matchWhitespace, matchPattern, matchVariable ], line) +        text = line[0:firstMatch] +        line = line[firstMatch:] +        lineParts.append(CheckElement.parseText(text)) +    return lineParts + +  # Returns the regex pattern to be matched in the output line. Variable +  # references are substituted with their current values provided in the +  # 'varState' argument. +  # An exception is raised if a referenced variable is undefined. +  def __generatePattern(self, linePart, varState): +    if linePart.variant == CheckElement.Variant.VarRef: +      try: +        return re.escape(varState[linePart.name]) +      except KeyError: +        raise Exception("Use of undefined variable '" + linePart.name + "' " + +                        "(line " + str(self.lineNo)) +    else: +      return linePart.pattern + +  # Attempts to match the check line against a line from the output file with +  # the given initial variable values. It returns the new variable state if +  # successful and None otherwise. +  def match(self, outputLine, initialVarState): +    initialSearchFrom = 0 +    initialPattern = self.__generatePattern(self.lineParts[0], initialVarState) +    while True: +      # Search for the first element on the regex parts list. This will mark +      # the point on the line from which we will attempt to match the rest of +      # the check pattern. If this iteration produces only a partial match, +      # the next iteration will start searching further in the output. +      firstMatch = re.search(initialPattern, outputLine[initialSearchFrom:]) +      if firstMatch is None: +        return None +      matchStart = initialSearchFrom + firstMatch.start() +      initialSearchFrom += firstMatch.start() + 1 + +      # Do the full matching on a shadow copy of the variable state. If the +      # matching fails half-way, we will not need to revert the state. +      varState = dict(initialVarState) + +      # Now try to parse all of the parts of the check line in the right order. +      # Variable values are updated on-the-fly, meaning that a variable can +      # be referenced immediately after its definition. +      fullyMatched = True +      for part in self.lineParts: +        pattern = self.__generatePattern(part, varState) +        match = re.match(pattern, outputLine[matchStart:]) +        if match is None: +          fullyMatched = False +          break +        matchEnd = matchStart + match.end() +        if part.variant == CheckElement.Variant.VarDef: +          if part.name in varState: +            raise Exception("Redefinition of variable '" + part.name + "'" + +                            " (line " + str(self.lineNo) + ")") +          varState[part.name] = outputLine[matchStart:matchEnd] +        matchStart = matchEnd + +      # Return the new variable state if all parts were successfully matched. +      # Otherwise loop and try to find another start point on the same line. +      if fullyMatched: +        return varState + + +class CheckGroup(CommonEqualityMixin): +  """Represents a named collection of check lines which are to be matched +     against an output group of the same name.""" + +  def __init__(self, name, lines): +    if name: +      self.name = name +    else: +      raise Exception("Check group does not have a name") +    if lines: +      self.lines = lines +    else: +      raise Exception("Check group " + self.name + " does not have a body") + +  def __headAndTail(self, list): +    return list[0], list[1:] + +  # The driver of matching inside a group. It simultaneously reads lines from +  # the output and check groups and attempts to match them against each other +  # in the correct order. +  def match(self, outputGroup): +    readOutputLines = 0 +    lastMatch = 0 + +    # Check and output lines which remain to be matched. +    checkLines = self.lines +    outputLines = outputGroup.body +    varState = {} + +    # Retrieve the next check line. +    while checkLines: +      checkLine, checkLines = self.__headAndTail(checkLines) +      foundMatch = False + +      # Retrieve the next output line. +      while outputLines: +        outputLine, outputLines = self.__headAndTail(outputLines) +        readOutputLines += 1 + +        # Try to match the current lines against each other. If successful, +        # save the new state of variables and continue to the next check line. +        newVarState = checkLine.match(outputLine, varState) +        if newVarState is not None: +          varState = newVarState +          lastMatch = readOutputLines +          foundMatch = True +          break +      if not foundMatch: +        raise Exception("Could not match check line \"" + checkLine.content + "\" from line " + +                        str(lastMatch+1) + " of the output. [vars=" + str(varState) + "]") + +  @staticmethod +  def parse(name, lines): +    return CheckGroup(name, list(map(lambda line: CheckLine(line), lines))) + + +class OutputGroup(CommonEqualityMixin): +  """Represents a named part of the test output against which a check group of +     the same name is to be matched.""" + +  def __init__(self, name, body): +    if name: +      self.name = name +    else: +      raise Exception("Output group does not have a name") +    if body: +      self.body = body +    else: +      raise Exception("Output group " + self.name + " does not have a body") + + +class FileSplitMixin(object): +  """Mixin for representing text files which need to be split into smaller +     chunks before being parsed.""" + +  def _parseStream(self, stream): +    lineNo = 0 +    allGroups = [] +    currentGroup = None + +    for line in stream: +      lineNo += 1 +      line = line.strip() +      if not line: +        continue + +      # Let the child class process the line and return information about it. +      # The _processLine method can modify the content of the line (or delete it +      # entirely) and specify whether it starts a new group. +      processedLine, newGroupName = self._processLine(line, lineNo) +      if newGroupName is not None: +        currentGroup = (newGroupName, []) +        allGroups.append(currentGroup) +      if processedLine is not None: +        currentGroup[1].append(processedLine) + +    # Finally, take the generated line groups and let the child class process +    # each one before storing the final outcome. +    return list(map(lambda group: self._processGroup(group[0], group[1]), allGroups)) + + +class CheckFile(FileSplitMixin): +  """Collection of check groups extracted from the input test file.""" + +  def __init__(self, prefix, checkStream): +    self.prefix = prefix +    self.groups = self._parseStream(checkStream) + +  # Attempts to parse a check line. The regex searches for a comment symbol +  # followed by the CHECK keyword, given attribute and a colon at the very +  # beginning of the line. Whitespaces are ignored. +  def _extractLine(self, prefix, line): +    ignoreWhitespace = "\s*" +    commentSymbols = ["//", "#"] +    prefixRegex = ignoreWhitespace + \ +                  "(" + "|".join(commentSymbols) + ")" + \ +                  ignoreWhitespace + \ +                  prefix + ":" + +    # The 'match' function succeeds only if the pattern is matched at the +    # beginning of the line. +    match = re.match(prefixRegex, line) +    if match is not None: +      return line[match.end():].strip() +    else: +      return None + +  def _processLine(self, line, lineNo): +    startLine = self._extractLine(self.prefix + "-START", line) +    if startLine is not None: +      # Line starts with the CHECK-START keyword, start a new group +      return (None, startLine) +    else: +      # Otherwise try to parse it as a standard CHECK line. If unsuccessful, +      # _extractLine will return None and the line will be ignored. +      return (self._extractLine(self.prefix, line), None) + +  def _exceptionLineOutsideGroup(self, line, lineNo): +    raise Exception("Check file line lies outside a group (line " + str(lineNo) + ")") + +  def _processGroup(self, name, lines): +    return CheckGroup.parse(name, lines) + +  def match(self, outputFile, printInfo=False): +    for checkGroup in self.groups: +      # TODO: Currently does not handle multiple occurrences of the same group +      # name, e.g. when a pass is run multiple times. It will always try to +      # match a check group against the first output group of the same name. +      outputGroup = outputFile.findGroup(checkGroup.name) +      if outputGroup is None: +        raise Exception("Group " + checkGroup.name + " not found in the output") +      if printInfo: +        print("TEST " + checkGroup.name + "... ", end="", flush=True) +      try: +        checkGroup.match(outputGroup) +        if printInfo: +          print("PASSED") +      except Exception as e: +        if printInfo: +          print("FAILED!") +        raise e + + +class OutputFile(FileSplitMixin): +  """Representation of the output generated by the test and split into groups +     within which the checks are performed. + +     C1visualizer format is parsed with a state machine which differentiates +     between the 'compilation' and 'cfg' blocks. The former marks the beginning +     of a method. It is parsed for the method's name but otherwise ignored. Each +     subsequent CFG block represents one stage of the compilation pipeline and +     is parsed into an output group named "<method name> <pass name>". +     """ + +  class ParsingState: +    OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4) + +  def __init__(self, outputStream): +    # Initialize the state machine +    self.lastMethodName = None +    self.state = OutputFile.ParsingState.OutsideBlock +    self.groups = self._parseStream(outputStream) + +  def _processLine(self, line, lineNo): +    if self.state == OutputFile.ParsingState.StartingCfgBlock: +      # Previous line started a new 'cfg' block which means that this one must +      # contain the name of the pass (this is enforced by C1visualizer). +      if re.match("name\s+\"[^\"]+\"", line): +        # Extract the pass name, prepend it with the name of the method and +        # return as the beginning of a new group. +        self.state = OutputFile.ParsingState.InsideCfgBlock +        return (None, self.lastMethodName + " " + line.split("\"")[1]) +      else: +        raise Exception("Expected group name in output file (line " + str(lineNo) + ")") + +    elif self.state == OutputFile.ParsingState.InsideCfgBlock: +      if line == "end_cfg": +        self.state = OutputFile.ParsingState.OutsideBlock +        return (None, None) +      else: +        return (line, None) + +    elif self.state == OutputFile.ParsingState.InsideCompilationBlock: +      # Search for the method's name. Format: method "<name>" +      if re.match("method\s+\"[^\"]+\"", line): +        self.lastMethodName = line.split("\"")[1] +      elif line == "end_compilation": +        self.state = OutputFile.ParsingState.OutsideBlock +      return (None, None) + +    else:  # self.state == OutputFile.ParsingState.OutsideBlock: +      if line == "begin_cfg": +        # The line starts a new group but we'll wait until the next line from +        # which we can extract the name of the pass. +        if self.lastMethodName is None: +          raise Exception("Output contains a pass without a method header" + +                          " (line " + str(lineNo) + ")") +        self.state = OutputFile.ParsingState.StartingCfgBlock +        return (None, None) +      elif line == "begin_compilation": +        self.state = OutputFile.ParsingState.InsideCompilationBlock +        return (None, None) +      else: +        raise Exception("Output line lies outside a group (line " + str(lineNo) + ")") + +  def _processGroup(self, name, lines): +    return OutputGroup(name, lines) + +  def findGroup(self, name): +    for group in self.groups: +      if group.name == name: +        return group +    return None + + +def ParseArguments(): +  parser = argparse.ArgumentParser() +  parser.add_argument("test_file", help="the source of the test with checking annotations") +  parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX", +                      help="prefix of checks in the test file (default: CHECK)") +  parser.add_argument("--list-groups", dest="list_groups", action="store_true", +                      help="print a list of all groups found in the test output") +  parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP", +                      help="print the contents of an output group") +  return parser.parse_args() + + +class cd: +  """Helper class which temporarily changes the working directory.""" + +  def __init__(self, newPath): +    self.newPath = newPath + +  def __enter__(self): +    self.savedPath = os.getcwd() +    os.chdir(self.newPath) + +  def __exit__(self, etype, value, traceback): +    os.chdir(self.savedPath) + + +def CompileTest(inputFile, tempFolder): +  classFolder = tempFolder + "/classes" +  dexFile = tempFolder + "/test.dex" +  oatFile = tempFolder + "/test.oat" +  outputFile = tempFolder + "/art.cfg" +  os.makedirs(classFolder) + +  # Build a DEX from the source file. We pass "--no-optimize" to dx to avoid +  # interference with its optimizations. +  check_call(["javac", "-d", classFolder, inputFile]) +  check_call(["dx", "--dex", "--no-optimize", "--output=" + dexFile, classFolder]) + +  # Run dex2oat and export the HGraph. The output is stored into ${PWD}/art.cfg. +  with cd(tempFolder): +    check_call(["dex2oat", "-j1", "--dump-passes", "--compiler-backend=Optimizing", +                "--android-root=" + os.environ["ANDROID_HOST_OUT"], +                "--boot-image=" + os.environ["ANDROID_HOST_OUT"] + "/framework/core-optimizing.art", +                "--runtime-arg", "-Xnorelocate", "--dex-file=" + dexFile, "--oat-file=" + oatFile]) + +  return outputFile + + +def ListGroups(outputFilename): +  outputFile = OutputFile(open(outputFilename, "r")) +  for group in outputFile.groups: +    print(group.name) + + +def DumpGroup(outputFilename, groupName): +  outputFile = OutputFile(open(outputFilename, "r")) +  group = outputFile.findGroup(groupName) +  if group: +    print("\n".join(group.body)) +  else: +    raise Exception("Check group " + groupName + " not found in the output") + + +def RunChecks(checkPrefix, checkFilename, outputFilename): +  checkFile = CheckFile(checkPrefix, open(checkFilename, "r")) +  outputFile = OutputFile(open(outputFilename, "r")) +  checkFile.match(outputFile, True) + + +if __name__ == "__main__": +  args = ParseArguments() +  tempFolder = tempfile.mkdtemp() + +  try: +    outputFile = CompileTest(args.test_file, tempFolder) +    if args.list_groups: +      ListGroups(outputFile) +    elif args.dump_group: +      DumpGroup(outputFile, args.dump_group) +    else: +      RunChecks(args.check_prefix, args.test_file, outputFile) +  finally: +    shutil.rmtree(tempFolder)  |