ART: Refactored regex code in Checker This patch refactors how Checker constructs regular expressions which it uses to parse tests and verify compiler output. It also replaces all occurrences of the '[0-9]+' ID-matching expression with the '\d+' shorthand. Change-Id: I5f854a25707e44ed2fa1673ff084990e8f43e4a2

commit: be0cc08fb4faed1ab69361fcd030af65f9020393 [log] [tgz]
author: David Brazdil <dbrazdil@google.com> Wed Dec 31 11:49:30 2014 +0000
committer: David Brazdil <dbrazdil@google.com> Thu Jan 08 21:57:34 2015 +0000
tree: 493c048e498435afea9de491b5a2ae0c5876a4eb
parent: 7e1a34386368d2bb3dc89bf5aa0519cafc326095 [diff] [blame]
diff --git a/tools/checker.py b/tools/checker.py
index 5e910ec..b1abea2 100755
--- a/tools/checker.py
+++ b/tools/checker.py

@@ -159,6 +159,23 @@
     """Supported language constructs."""
     Text, Pattern, VarRef, VarDef = range(4)
 
+  rStartOptional = r"("
+  rEndOptional = r")?"
+
+  rName = r"([a-zA-Z][a-zA-Z0-9]*)"
+  rRegex = r"(.+?)"
+  rPatternStartSym = r"(\{\{)"
+  rPatternEndSym = r"(\}\})"
+  rVariableStartSym = r"(\[\[)"
+  rVariableEndSym = r"(\]\])"
+  rVariableSeparator = r"(:)"
+
+  regexPattern = rPatternStartSym + rRegex + rPatternEndSym
+  regexVariable = rVariableStartSym + \
+                    rName + \
+                    (rStartOptional + rVariableSeparator + rRegex + rEndOptional) + \
+                  rVariableEndSym
+
   def __init__(self, variant, name, pattern):
     self.variant = variant
     self.name = name
@@ -170,22 +187,21 @@
 
   @staticmethod
   def parsePattern(patternElem):
-    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:len(patternElem)-2])
+    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:-2])
 
   @staticmethod
   def parseVariable(varElem):
     colonPos = varElem.find(":")
     if colonPos == -1:
       # Variable reference
-      name = varElem[2:len(varElem)-2]
+      name = varElem[2:-2]
       return CheckElement(CheckElement.Variant.VarRef, name, None)
     else:
       # Variable definition
       name = varElem[2:colonPos]
-      body = varElem[colonPos+1:len(varElem)-2]
+      body = varElem[colonPos+1:-2]
       return CheckElement(CheckElement.Variant.VarDef, name, body)
 
-
 class CheckLine(CommonEqualityMixin):
   """Representation of a single assertion in the check file formed of one or
      more regex elements. Matching against an output line is successful only
@@ -226,24 +242,6 @@
     starts = map(lambda m: len(string) if m is None else m.start(), matches)
     return min(starts)
 
-  # Returns the regex for finding a regex pattern in the check line.
-  def __getPatternRegex(self):
-    rStartSym = "\{\{"
-    rEndSym = "\}\}"
-    rBody = ".+?"
-    return rStartSym + rBody + rEndSym
-
-  # Returns the regex for finding a variable use in the check line.
-  def __getVariableRegex(self):
-    rStartSym = "\[\["
-    rEndSym = "\]\]"
-    rStartOptional = "("
-    rEndOptional = ")?"
-    rName = "[a-zA-Z][a-zA-Z0-9]*"
-    rSeparator = ":"
-    rBody = ".+?"
-    return rStartSym + rName + rStartOptional + rSeparator + rBody + rEndOptional + rEndSym
-
   # This method parses the content of a check line stripped of the initial
   # comment symbol and the CHECK keyword.
   def __parse(self, line):
@@ -251,9 +249,9 @@
     # Loop as long as there is something to parse.
     while line:
       # Search for the nearest occurrence of the special markers.
-      matchWhitespace = re.search("\s+", line)
-      matchPattern = re.search(self.__getPatternRegex(), line)
-      matchVariable = re.search(self.__getVariableRegex(), line)
+      matchWhitespace = re.search(r"\s+", line)
+      matchPattern = re.search(CheckElement.regexPattern, line)
+      matchVariable = re.search(CheckElement.regexVariable, line)
 
       # If one of the above was identified at the current position, extract them
       # from the line, parse them and add to the list of line parts.
@@ -262,7 +260,7 @@
         # a whitespace, we add a regex pattern for an arbitrary non-zero number
         # of whitespaces.
         line = line[matchWhitespace.end():]
-        lineParts.append(CheckElement.parsePattern("{{\s+}}"))
+        lineParts.append(CheckElement.parsePattern(r"{{\s+}}"))
       elif self.__isMatchAtStart(matchPattern):
         pattern = line[0:matchPattern.end()]
         line = line[matchPattern.end():]
@@ -536,16 +534,16 @@
   # followed by the CHECK keyword, given attribute and a colon at the very
   # beginning of the line. Whitespaces are ignored.
   def _extractLine(self, prefix, line):
-    ignoreWhitespace = "\s*"
-    commentSymbols = ["//", "#"]
-    prefixRegex = ignoreWhitespace + \
-                  "(" + "|".join(commentSymbols) + ")" + \
-                  ignoreWhitespace + \
-                  prefix + ":"
+    rIgnoreWhitespace = r"\s*"
+    rCommentSymbols = [r"//", r"#"]
+    regexPrefix = rIgnoreWhitespace + \
+                  r"(" + r"|".join(rCommentSymbols) + r")" + \
+                  rIgnoreWhitespace + \
+                  prefix + r":"
 
     # The 'match' function succeeds only if the pattern is matched at the
     # beginning of the line.
-    match = re.match(prefixRegex, line)
+    match = re.match(regexPrefix, line)
     if match is not None:
       return line[match.end():].strip()
     else:
commit	be0cc08fb4faed1ab69361fcd030af65f9020393	[log] [tgz]
author	David Brazdil <dbrazdil@google.com>	Wed Dec 31 11:49:30 2014 +0000
committer	David Brazdil <dbrazdil@google.com>	Thu Jan 08 21:57:34 2015 +0000
tree	493c048e498435afea9de491b5a2ae0c5876a4eb
parent	7e1a34386368d2bb3dc89bf5aa0519cafc326095 [diff] [blame]