ART: Split Checker into smaller files

Checker code has become too messy and incomprehensible. This patch
splits it into more manageable and better structured units.
Functionality remains unchanged.

Resubmission of change I870c69827d2be2d09196a51382a3f47f31cd2ba3 due
to omission of file 'tools/checker/file_format/common.py'.

Change-Id: I277a4aa65a2e3b54f0e89901fdb9f289f55a325f
diff --git a/tools/checker/file_format/__init__.py b/tools/checker/file_format/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/file_format/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/c1visualizer/__init__.py b/tools/checker/file_format/c1visualizer/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/c1visualizer/parser.py b/tools/checker/file_format/c1visualizer/parser.py
new file mode 100644
index 0000000..335a195
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/parser.py
@@ -0,0 +1,87 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger                   import Logger
+from file_format.common              import SplitStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+
+import re
+
+class C1ParserState:
+  OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
+
+  def __init__(self):
+    self.currentState = C1ParserState.OutsideBlock
+    self.lastMethodName = None
+
+def __parseC1Line(line, lineNo, state, fileName):
+  """ This function is invoked on each line of the output file and returns
+      a pair which instructs the parser how the line should be handled. If the
+      line is to be included in the current group, it is returned in the first
+      value. If the line starts a new output group, the name of the group is
+      returned in the second value.
+  """
+  if state.currentState == C1ParserState.StartingCfgBlock:
+    # Previous line started a new 'cfg' block which means that this one must
+    # contain the name of the pass (this is enforced by C1visualizer).
+    if re.match("name\s+\"[^\"]+\"", line):
+      # Extract the pass name, prepend it with the name of the method and
+      # return as the beginning of a new group.
+      state.currentState = C1ParserState.InsideCfgBlock
+      return (None, state.lastMethodName + " " + line.split("\"")[1])
+    else:
+      Logger.fail("Expected output group name", fileName, lineNo)
+
+  elif state.currentState == C1ParserState.InsideCfgBlock:
+    if line == "end_cfg":
+      state.currentState = C1ParserState.OutsideBlock
+      return (None, None)
+    else:
+      return (line, None)
+
+  elif state.currentState == C1ParserState.InsideCompilationBlock:
+    # Search for the method's name. Format: method "<name>"
+    if re.match("method\s+\"[^\"]*\"", line):
+      methodName = line.split("\"")[1].strip()
+      if not methodName:
+        Logger.fail("Empty method name in output", fileName, lineNo)
+      state.lastMethodName = methodName
+    elif line == "end_compilation":
+      state.currentState = C1ParserState.OutsideBlock
+    return (None, None)
+
+  else:
+    assert state.currentState == C1ParserState.OutsideBlock
+    if line == "begin_cfg":
+      # The line starts a new group but we'll wait until the next line from
+      # which we can extract the name of the pass.
+      if state.lastMethodName is None:
+        Logger.fail("Expected method header", fileName, lineNo)
+      state.currentState = C1ParserState.StartingCfgBlock
+      return (None, None)
+    elif line == "begin_compilation":
+      state.currentState = C1ParserState.InsideCompilationBlock
+      return (None, None)
+    else:
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+
+def ParseC1visualizerStream(fileName, stream):
+  c1File = C1visualizerFile(fileName)
+  state = C1ParserState()
+  fnProcessLine = lambda line, lineNo: __parseC1Line(line, lineNo, state, fileName)
+  fnLineOutsideChunk = lambda line, lineNo: \
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+  for passName, passLines, startLineNo in SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+    C1visualizerPass(c1File, passName, passLines, startLineNo + 1)
+  return c1File
diff --git a/tools/checker/file_format/c1visualizer/struct.py b/tools/checker/file_format/c1visualizer/struct.py
new file mode 100644
index 0000000..991564e
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/struct.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger import Logger
+from common.mixins import PrintableMixin
+
+class C1visualizerFile(PrintableMixin):
+
+  def __init__(self, fileName):
+    self.fileName = fileName
+    self.passes = []
+
+  def addPass(self, new_pass):
+    self.passes.append(new_pass)
+
+  def findPass(self, name):
+    for entry in self.passes:
+      if entry.name == name:
+        return entry
+    return None
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.passes == other.passes
+
+
+class C1visualizerPass(PrintableMixin):
+
+  def __init__(self, parent, name, body, startLineNo):
+    self.parent = parent
+    self.name = name
+    self.body = body
+    self.startLineNo = startLineNo
+
+    if not self.name:
+      Logger.fail("C1visualizer pass does not have a name", self.fileName, self.startLineNo)
+    if not self.body:
+      Logger.fail("C1visualizer pass does not have a body", self.fileName, self.startLineNo)
+
+    self.parent.addPass(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.name == other.name \
+       and self.body == other.body
diff --git a/tools/checker/file_format/c1visualizer/test.py b/tools/checker/file_format/c1visualizer/test.py
new file mode 100644
index 0000000..812a4cf
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/test.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing                  import ToUnicode
+from file_format.c1visualizer.parser import ParseC1visualizerStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+
+import io
+import unittest
+
+class C1visualizerParser_Test(unittest.TestCase):
+
+  def createFile(self, passList):
+    """ Creates an instance of CheckerFile from provided info.
+
+    Data format: [ ( <case-name>, [ ( <text>, <assert-variant> ), ... ] ), ... ]
+    """
+    c1File = C1visualizerFile("<c1_file>")
+    for passEntry in passList:
+      passName = passEntry[0]
+      passBody = passEntry[1]
+      c1Pass = C1visualizerPass(c1File, passName, passBody, 0)
+    return c1File
+
+  def assertParsesTo(self, c1Text, expectedData):
+    expectedFile = self.createFile(expectedData)
+    actualFile = ParseC1visualizerStream("<c1_file>", io.StringIO(ToUnicode(c1Text)))
+    return self.assertEqual(expectedFile, actualFile)
+
+  def test_EmptyFile(self):
+    self.assertParsesTo("", [])
+
+  def test_SingleGroup(self):
+    self.assertParsesTo(
+      """
+        begin_compilation
+          method "MyMethod"
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+      """,
+      [ ( "MyMethod pass1", [ "foo", "bar" ] ) ])
+
+  def test_MultipleGroups(self):
+    self.assertParsesTo(
+      """
+        begin_compilation
+          name "xyz1"
+          method "MyMethod1"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+        begin_cfg
+          name "pass2"
+          abc
+          def
+        end_cfg
+      """,
+      [ ( "MyMethod1 pass1", [ "foo", "bar" ] ),
+        ( "MyMethod1 pass2", [ "abc", "def" ] ) ])
+    self.assertParsesTo(
+      """
+        begin_compilation
+          name "xyz1"
+          method "MyMethod1"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+        begin_compilation
+          name "xyz2"
+          method "MyMethod2"
+          date 5678
+        end_compilation
+        begin_cfg
+          name "pass2"
+          abc
+          def
+        end_cfg
+      """,
+      [ ( "MyMethod1 pass1", [ "foo", "bar" ] ),
+        ( "MyMethod2 pass2", [ "abc", "def" ] ) ])
diff --git a/tools/checker/file_format/checker/__init__.py b/tools/checker/file_format/checker/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/file_format/checker/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/checker/parser.py b/tools/checker/file_format/checker/parser.py
new file mode 100644
index 0000000..d7a38da
--- /dev/null
+++ b/tools/checker/file_format/checker/parser.py
@@ -0,0 +1,142 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from file_format.common         import SplitStream
+from file_format.checker.struct import CheckerFile, TestCase, TestAssertion, RegexExpression
+
+import re
+
+def __extractLine(prefix, line):
+  """ Attempts to parse a check line. The regex searches for a comment symbol
+      followed by the CHECK keyword, given attribute and a colon at the very
+      beginning of the line. Whitespaces are ignored.
+  """
+  rIgnoreWhitespace = r"\s*"
+  rCommentSymbols = [r"//", r"#"]
+  regexPrefix = rIgnoreWhitespace + \
+                r"(" + r"|".join(rCommentSymbols) + r")" + \
+                rIgnoreWhitespace + \
+                prefix + r":"
+
+  # The 'match' function succeeds only if the pattern is matched at the
+  # beginning of the line.
+  match = re.match(regexPrefix, line)
+  if match is not None:
+    return line[match.end():].strip()
+  else:
+    return None
+
+def __processLine(line, lineNo, prefix):
+  """ This function is invoked on each line of the check file and returns a pair
+      which instructs the parser how the line should be handled. If the line is
+      to be included in the current check group, it is returned in the first
+      value. If the line starts a new check group, the name of the group is
+      returned in the second value.
+  """
+  # Lines beginning with 'CHECK-START' start a new test case.
+  startLine = __extractLine(prefix + "-START", line)
+  if startLine is not None:
+    return None, startLine
+
+  # Lines starting only with 'CHECK' are matched in order.
+  plainLine = __extractLine(prefix, line)
+  if plainLine is not None:
+    return (plainLine, TestAssertion.Variant.InOrder, lineNo), None
+
+  # 'CHECK-DAG' lines are no-order assertions.
+  dagLine = __extractLine(prefix + "-DAG", line)
+  if dagLine is not None:
+    return (dagLine, TestAssertion.Variant.DAG, lineNo), None
+
+  # 'CHECK-NOT' lines are no-order negative assertions.
+  notLine = __extractLine(prefix + "-NOT", line)
+  if notLine is not None:
+    return (notLine, TestAssertion.Variant.Not, lineNo), None
+
+  # Other lines are ignored.
+  return None, None
+
+def __isMatchAtStart(match):
+  """ Tests if the given Match occurred at the beginning of the line. """
+  return (match is not None) and (match.start() == 0)
+
+def __firstMatch(matches, string):
+  """ Takes in a list of Match objects and returns the minimal start point among
+      them. If there aren't any successful matches it returns the length of
+      the searched string.
+  """
+  starts = map(lambda m: len(string) if m is None else m.start(), matches)
+  return min(starts)
+
+def ParseCheckerAssertion(parent, line, variant, lineNo):
+  """ This method parses the content of a check line stripped of the initial
+      comment symbol and the CHECK keyword.
+  """
+  assertion = TestAssertion(parent, variant, line, lineNo)
+  # Loop as long as there is something to parse.
+  while line:
+    # Search for the nearest occurrence of the special markers.
+    matchWhitespace = re.search(r"\s+", line)
+    matchPattern = re.search(RegexExpression.Regex.regexPattern, line)
+    matchVariableReference = re.search(RegexExpression.Regex.regexVariableReference, line)
+    matchVariableDefinition = re.search(RegexExpression.Regex.regexVariableDefinition, line)
+
+    # If one of the above was identified at the current position, extract them
+    # from the line, parse them and add to the list of line parts.
+    if __isMatchAtStart(matchWhitespace):
+      # A whitespace in the check line creates a new separator of line parts.
+      # This allows for ignored output between the previous and next parts.
+      line = line[matchWhitespace.end():]
+      assertion.addExpression(RegexExpression.createSeparator())
+    elif __isMatchAtStart(matchPattern):
+      pattern = line[0:matchPattern.end()]
+      pattern = pattern[2:-2]
+      line = line[matchPattern.end():]
+      assertion.addExpression(RegexExpression.createPattern(pattern))
+    elif __isMatchAtStart(matchVariableReference):
+      var = line[0:matchVariableReference.end()]
+      line = line[matchVariableReference.end():]
+      name = var[2:-2]
+      assertion.addExpression(RegexExpression.createVariableReference(name))
+    elif __isMatchAtStart(matchVariableDefinition):
+      var = line[0:matchVariableDefinition.end()]
+      line = line[matchVariableDefinition.end():]
+      colonPos = var.find(":")
+      name = var[2:colonPos]
+      body = var[colonPos+1:-2]
+      assertion.addExpression(RegexExpression.createVariableDefinition(name, body))
+    else:
+      # If we're not currently looking at a special marker, this is a plain
+      # text match all the way until the first special marker (or the end
+      # of the line).
+      firstMatch = __firstMatch([ matchWhitespace,
+                                  matchPattern,
+                                  matchVariableReference,
+                                  matchVariableDefinition ],
+                                line)
+      text = line[0:firstMatch]
+      line = line[firstMatch:]
+      assertion.addExpression(RegexExpression.createText(text))
+  return assertion
+
+def ParseCheckerStream(fileName, prefix, stream):
+  checkerFile = CheckerFile(fileName)
+  fnProcessLine = lambda line, lineNo: __processLine(line, lineNo, prefix)
+  fnLineOutsideChunk = lambda line, lineNo: \
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+  for caseName, caseLines, startLineNo in SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+    testCase = TestCase(checkerFile, caseName, startLineNo)
+    for caseLine in caseLines:
+      ParseCheckerAssertion(testCase, caseLine[0], caseLine[1], caseLine[2])
+  return checkerFile
diff --git a/tools/checker/file_format/checker/struct.py b/tools/checker/file_format/checker/struct.py
new file mode 100644
index 0000000..3354cb6
--- /dev/null
+++ b/tools/checker/file_format/checker/struct.py
@@ -0,0 +1,156 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger import Logger
+from common.mixins import EqualityMixin, PrintableMixin
+
+import re
+
+class CheckerFile(PrintableMixin):
+
+  def __init__(self, fileName):
+    self.fileName = fileName
+    self.testCases = []
+
+  def addTestCase(self, new_test_case):
+    self.testCases.append(new_test_case)
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.testCases == other.testCases
+
+
+class TestCase(PrintableMixin):
+
+  def __init__(self, parent, name, startLineNo):
+    assert isinstance(parent, CheckerFile)
+
+    self.parent = parent
+    self.name = name
+    self.assertions = []
+    self.startLineNo = startLineNo
+
+    if not self.name:
+      Logger.fail("Test case does not have a name", self.parent.fileName, self.startLineNo)
+
+    self.parent.addTestCase(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def addAssertion(self, new_assertion):
+    self.assertions.append(new_assertion)
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.name == other.name \
+       and self.assertions == other.assertions
+
+
+class TestAssertion(PrintableMixin):
+
+  class Variant(object):
+    """Supported types of assertions."""
+    InOrder, DAG, Not = range(3)
+
+  def __init__(self, parent, variant, originalText, lineNo):
+    assert isinstance(parent, TestCase)
+
+    self.parent = parent
+    self.variant = variant
+    self.expressions = []
+    self.lineNo = lineNo
+    self.originalText = originalText
+
+    self.parent.addAssertion(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def addExpression(self, new_expression):
+    assert isinstance(new_expression, RegexExpression)
+    if self.variant == TestAssertion.Variant.Not:
+      if new_expression.variant == RegexExpression.Variant.VarDef:
+        Logger.fail("CHECK-NOT lines cannot define variables", self.fileName, self.lineNo)
+    self.expressions.append(new_expression)
+
+  def toRegex(self):
+    """ Returns a regex pattern for this entire assertion. Only used in tests. """
+    regex = ""
+    for expression in self.expressions:
+      if expression.variant == RegexExpression.Variant.Separator:
+        regex = regex + ", "
+      else:
+        regex = regex + "(" + expression.pattern + ")"
+    return regex
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.variant == other.variant \
+       and self.expressions == other.expressions
+
+
+class RegexExpression(EqualityMixin, PrintableMixin):
+
+  class Variant(object):
+    """Supported language constructs."""
+    Text, Pattern, VarRef, VarDef, Separator = range(5)
+
+  class Regex(object):
+    rName = r"([a-zA-Z][a-zA-Z0-9]*)"
+    rRegex = r"(.+?)"
+    rPatternStartSym = r"(\{\{)"
+    rPatternEndSym = r"(\}\})"
+    rVariableStartSym = r"(\[\[)"
+    rVariableEndSym = r"(\]\])"
+    rVariableSeparator = r"(:)"
+
+    regexPattern = rPatternStartSym + rRegex + rPatternEndSym
+    regexVariableReference = rVariableStartSym + rName + rVariableEndSym
+    regexVariableDefinition = rVariableStartSym + rName + rVariableSeparator + rRegex + rVariableEndSym
+
+  def __init__(self, variant, name, pattern):
+    self.variant = variant
+    self.name = name
+    self.pattern = pattern
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.variant == other.variant \
+       and self.name == other.name \
+       and self.pattern == other.pattern
+
+  @staticmethod
+  def createSeparator():
+    return RegexExpression(RegexExpression.Variant.Separator, None, None)
+
+  @staticmethod
+  def createText(text):
+    return RegexExpression(RegexExpression.Variant.Text, None, re.escape(text))
+
+  @staticmethod
+  def createPattern(pattern):
+    return RegexExpression(RegexExpression.Variant.Pattern, None, pattern)
+
+  @staticmethod
+  def createVariableReference(name):
+    assert re.match(RegexExpression.Regex.rName, name)
+    return RegexExpression(RegexExpression.Variant.VarRef, name, None)
+
+  @staticmethod
+  def createVariableDefinition(name, pattern):
+    assert re.match(RegexExpression.Regex.rName, name)
+    return RegexExpression(RegexExpression.Variant.VarDef, name, pattern)
diff --git a/tools/checker/file_format/checker/test.py b/tools/checker/file_format/checker/test.py
new file mode 100644
index 0000000..167c888
--- /dev/null
+++ b/tools/checker/file_format/checker/test.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing             import ToUnicode
+from file_format.checker.parser import ParseCheckerStream
+from file_format.checker.struct import CheckerFile, TestCase, TestAssertion, RegexExpression
+
+import io
+import unittest
+
+CheckerException = SystemExit
+
+class CheckerParser_PrefixTest(unittest.TestCase):
+
+  def tryParse(self, string):
+    checkerText = u"// CHECK-START: pass\n" + ToUnicode(string)
+    checkFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(checkerText))
+    self.assertEqual(len(checkFile.testCases), 1)
+    testCase = checkFile.testCases[0]
+    return len(testCase.assertions) != 0
+
+  def test_InvalidFormat(self):
+    self.assertFalse(self.tryParse("CHECK"))
+    self.assertFalse(self.tryParse(":CHECK"))
+    self.assertFalse(self.tryParse("CHECK:"))
+    self.assertFalse(self.tryParse("//CHECK"))
+    self.assertFalse(self.tryParse("#CHECK"))
+
+    self.assertTrue(self.tryParse("//CHECK:foo"))
+    self.assertTrue(self.tryParse("#CHECK:bar"))
+
+  def test_InvalidLabel(self):
+    self.assertFalse(self.tryParse("//ACHECK:foo"))
+    self.assertFalse(self.tryParse("#ACHECK:foo"))
+
+  def test_NotFirstOnTheLine(self):
+    self.assertFalse(self.tryParse("A// CHECK: foo"))
+    self.assertFalse(self.tryParse("A # CHECK: foo"))
+    self.assertFalse(self.tryParse("// // CHECK: foo"))
+    self.assertFalse(self.tryParse("# # CHECK: foo"))
+
+  def test_WhitespaceAgnostic(self):
+    self.assertTrue(self.tryParse("  //CHECK: foo"))
+    self.assertTrue(self.tryParse("//  CHECK: foo"))
+    self.assertTrue(self.tryParse("    //CHECK: foo"))
+    self.assertTrue(self.tryParse("//    CHECK: foo"))
+
+
+class CheckerParser_RegexExpressionTest(unittest.TestCase):
+
+  def parseAssertion(self, string, variant=""):
+    checkerText = u"// CHECK-START: pass\n// CHECK" + ToUnicode(variant) + u": " + ToUnicode(string)
+    checkerFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(checkerText))
+    self.assertEqual(len(checkerFile.testCases), 1)
+    testCase = checkerFile.testCases[0]
+    self.assertEqual(len(testCase.assertions), 1)
+    return testCase.assertions[0]
+
+  def parseExpression(self, string):
+    line = self.parseAssertion(string)
+    self.assertEqual(1, len(line.expressions))
+    return line.expressions[0]
+
+  def assertEqualsRegex(self, string, expected):
+    self.assertEqual(expected, self.parseAssertion(string).toRegex())
+
+  def assertEqualsText(self, string, text):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createText(text))
+
+  def assertEqualsPattern(self, string, pattern):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createPattern(pattern))
+
+  def assertEqualsVarRef(self, string, name):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createVariableReference(name))
+
+  def assertEqualsVarDef(self, string, name, pattern):
+    self.assertEqual(self.parseExpression(string),
+                     RegexExpression.createVariableDefinition(name, pattern))
+
+  def assertVariantNotEqual(self, string, variant):
+    self.assertNotEqual(variant, self.parseExpression(string).variant)
+
+  # Test that individual parts of the line are recognized
+
+  def test_TextOnly(self):
+    self.assertEqualsText("foo", "foo")
+    self.assertEqualsText("  foo  ", "foo")
+    self.assertEqualsRegex("f$o^o", "(f\$o\^o)")
+
+  def test_PatternOnly(self):
+    self.assertEqualsPattern("{{a?b.c}}", "a?b.c")
+
+  def test_VarRefOnly(self):
+    self.assertEqualsVarRef("[[ABC]]", "ABC")
+
+  def test_VarDefOnly(self):
+    self.assertEqualsVarDef("[[ABC:a?b.c]]", "ABC", "a?b.c")
+
+  def test_TextWithWhitespace(self):
+    self.assertEqualsRegex("foo bar", "(foo), (bar)")
+    self.assertEqualsRegex("foo   bar", "(foo), (bar)")
+
+  def test_TextWithRegex(self):
+    self.assertEqualsRegex("foo{{abc}}bar", "(foo)(abc)(bar)")
+
+  def test_TextWithVar(self):
+    self.assertEqualsRegex("foo[[ABC:abc]]bar", "(foo)(abc)(bar)")
+
+  def test_PlainWithRegexAndWhitespaces(self):
+    self.assertEqualsRegex("foo {{abc}}bar", "(foo), (abc)(bar)")
+    self.assertEqualsRegex("foo{{abc}} bar", "(foo)(abc), (bar)")
+    self.assertEqualsRegex("foo {{abc}} bar", "(foo), (abc), (bar)")
+
+  def test_PlainWithVarAndWhitespaces(self):
+    self.assertEqualsRegex("foo [[ABC:abc]]bar", "(foo), (abc)(bar)")
+    self.assertEqualsRegex("foo[[ABC:abc]] bar", "(foo)(abc), (bar)")
+    self.assertEqualsRegex("foo [[ABC:abc]] bar", "(foo), (abc), (bar)")
+
+  def test_AllKinds(self):
+    self.assertEqualsRegex("foo [[ABC:abc]]{{def}}bar", "(foo), (abc)(def)(bar)")
+    self.assertEqualsRegex("foo[[ABC:abc]] {{def}}bar", "(foo)(abc), (def)(bar)")
+    self.assertEqualsRegex("foo [[ABC:abc]] {{def}} bar", "(foo), (abc), (def), (bar)")
+
+  # # Test that variables and patterns are parsed correctly
+
+  def test_ValidPattern(self):
+    self.assertEqualsPattern("{{abc}}", "abc")
+    self.assertEqualsPattern("{{a[b]c}}", "a[b]c")
+    self.assertEqualsPattern("{{(a{bc})}}", "(a{bc})")
+
+  def test_ValidRef(self):
+    self.assertEqualsVarRef("[[ABC]]", "ABC")
+    self.assertEqualsVarRef("[[A1BC2]]", "A1BC2")
+
+  def test_ValidDef(self):
+    self.assertEqualsVarDef("[[ABC:abc]]", "ABC", "abc")
+    self.assertEqualsVarDef("[[ABC:ab:c]]", "ABC", "ab:c")
+    self.assertEqualsVarDef("[[ABC:a[b]c]]", "ABC", "a[b]c")
+    self.assertEqualsVarDef("[[ABC:(a[bc])]]", "ABC", "(a[bc])")
+
+  def test_Empty(self):
+    self.assertVariantNotEqual("{{}}", RegexExpression.Variant.Pattern)
+    self.assertVariantNotEqual("[[]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[:]]", RegexExpression.Variant.VarDef)
+
+  def test_InvalidVarName(self):
+    self.assertVariantNotEqual("[[0ABC]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[AB=C]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[ABC=]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[0ABC:abc]]", RegexExpression.Variant.VarDef)
+    self.assertVariantNotEqual("[[AB=C:abc]]", RegexExpression.Variant.VarDef)
+    self.assertVariantNotEqual("[[ABC=:abc]]", RegexExpression.Variant.VarDef)
+
+  def test_BodyMatchNotGreedy(self):
+    self.assertEqualsRegex("{{abc}}{{def}}", "(abc)(def)")
+    self.assertEqualsRegex("[[ABC:abc]][[DEF:def]]", "(abc)(def)")
+
+  def test_NoVarDefsInNotChecks(self):
+    with self.assertRaises(CheckerException):
+      self.parseAssertion("[[ABC:abc]]", "-NOT")
+
+
+class CheckerParser_FileLayoutTest(unittest.TestCase):
+
+  # Creates an instance of CheckerFile from provided info.
+  # Data format: [ ( <case-name>, [ ( <text>, <assert-variant> ), ... ] ), ... ]
+  def createFile(self, caseList):
+    testFile = CheckerFile("<test_file>")
+    for caseEntry in caseList:
+      caseName = caseEntry[0]
+      testCase = TestCase(testFile, caseName, 0)
+      assertionList = caseEntry[1]
+      for assertionEntry in assertionList:
+        content = assertionEntry[0]
+        variant = assertionEntry[1]
+        assertion = TestAssertion(testCase, variant, content, 0)
+        assertion.addExpression(RegexExpression.createText(content))
+    return testFile
+
+  def assertParsesTo(self, checkerText, expectedData):
+    expectedFile = self.createFile(expectedData)
+    actualFile = ParseCheckerStream("<test_file>", "CHECK", io.StringIO(ToUnicode(checkerText)))
+    return self.assertEqual(expectedFile, actualFile)
+
+  def test_EmptyFile(self):
+    self.assertParsesTo("", [])
+
+  def test_SingleGroup(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group
+        // CHECK:  foo
+        // CHECK:    bar
+      """,
+      [ ( "Example Group", [ ("foo", TestAssertion.Variant.InOrder),
+                             ("bar", TestAssertion.Variant.InOrder) ] ) ])
+
+  def test_MultipleGroups(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group1
+        // CHECK: foo
+        // CHECK: bar
+        // CHECK-START: Example Group2
+        // CHECK: abc
+        // CHECK: def
+      """,
+      [ ( "Example Group1", [ ("foo", TestAssertion.Variant.InOrder),
+                              ("bar", TestAssertion.Variant.InOrder) ] ),
+        ( "Example Group2", [ ("abc", TestAssertion.Variant.InOrder),
+                              ("def", TestAssertion.Variant.InOrder) ] ) ])
+
+  def test_AssertionVariants(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group
+        // CHECK:     foo
+        // CHECK-NOT: bar
+        // CHECK-DAG: abc
+        // CHECK-DAG: def
+      """,
+      [ ( "Example Group", [ ("foo", TestAssertion.Variant.InOrder),
+                             ("bar", TestAssertion.Variant.Not),
+                             ("abc", TestAssertion.Variant.DAG),
+                             ("def", TestAssertion.Variant.DAG) ] ) ])
diff --git a/tools/checker/file_format/common.py b/tools/checker/file_format/common.py
new file mode 100644
index 0000000..f91fdeb
--- /dev/null
+++ b/tools/checker/file_format/common.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+  """ Reads the given input stream and splits it into chunks based on
+      information extracted from individual lines.
+
+  Arguments:
+   - fnProcessLine: Called on each line with the text and line number. Must
+     return a pair, name of the chunk started on this line and data extracted
+     from this line (or None in both cases).
+   - fnLineOutsideChunk: Called on attempt to attach data prior to creating
+     a chunk.
+  """
+  lineNo = 0
+  allChunks = []
+  currentChunk = None
+
+  for line in stream:
+    lineNo += 1
+    line = line.strip()
+    if not line:
+      continue
+
+    # Let the child class process the line and return information about it.
+    # The _processLine method can modify the content of the line (or delete it
+    # entirely) and specify whether it starts a new group.
+    processedLine, newChunkName = fnProcessLine(line, lineNo)
+    if newChunkName is not None:
+      currentChunk = (newChunkName, [], lineNo)
+      allChunks.append(currentChunk)
+    if processedLine is not None:
+      if currentChunk is not None:
+        currentChunk[1].append(processedLine)
+      else:
+        fnLineOutsideChunk(line, lineNo)
+  return allChunks