ART: Refactored regex code in Checker
This patch refactors how Checker constructs regular expressions which
it uses to parse tests and verify compiler output. It also replaces
all occurrences of the '[0-9]+' ID-matching expression with the '\d+'
shorthand.
Change-Id: I5f854a25707e44ed2fa1673ff084990e8f43e4a2
diff --git a/tools/checker.py b/tools/checker.py
index 5e910ec..b1abea2 100755
--- a/tools/checker.py
+++ b/tools/checker.py
@@ -159,6 +159,23 @@
"""Supported language constructs."""
Text, Pattern, VarRef, VarDef = range(4)
+ rStartOptional = r"("
+ rEndOptional = r")?"
+
+ rName = r"([a-zA-Z][a-zA-Z0-9]*)"
+ rRegex = r"(.+?)"
+ rPatternStartSym = r"(\{\{)"
+ rPatternEndSym = r"(\}\})"
+ rVariableStartSym = r"(\[\[)"
+ rVariableEndSym = r"(\]\])"
+ rVariableSeparator = r"(:)"
+
+ regexPattern = rPatternStartSym + rRegex + rPatternEndSym
+ regexVariable = rVariableStartSym + \
+ rName + \
+ (rStartOptional + rVariableSeparator + rRegex + rEndOptional) + \
+ rVariableEndSym
+
def __init__(self, variant, name, pattern):
self.variant = variant
self.name = name
@@ -170,22 +187,21 @@
@staticmethod
def parsePattern(patternElem):
- return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:len(patternElem)-2])
+ return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:-2])
@staticmethod
def parseVariable(varElem):
colonPos = varElem.find(":")
if colonPos == -1:
# Variable reference
- name = varElem[2:len(varElem)-2]
+ name = varElem[2:-2]
return CheckElement(CheckElement.Variant.VarRef, name, None)
else:
# Variable definition
name = varElem[2:colonPos]
- body = varElem[colonPos+1:len(varElem)-2]
+ body = varElem[colonPos+1:-2]
return CheckElement(CheckElement.Variant.VarDef, name, body)
-
class CheckLine(CommonEqualityMixin):
"""Representation of a single assertion in the check file formed of one or
more regex elements. Matching against an output line is successful only
@@ -226,24 +242,6 @@
starts = map(lambda m: len(string) if m is None else m.start(), matches)
return min(starts)
- # Returns the regex for finding a regex pattern in the check line.
- def __getPatternRegex(self):
- rStartSym = "\{\{"
- rEndSym = "\}\}"
- rBody = ".+?"
- return rStartSym + rBody + rEndSym
-
- # Returns the regex for finding a variable use in the check line.
- def __getVariableRegex(self):
- rStartSym = "\[\["
- rEndSym = "\]\]"
- rStartOptional = "("
- rEndOptional = ")?"
- rName = "[a-zA-Z][a-zA-Z0-9]*"
- rSeparator = ":"
- rBody = ".+?"
- return rStartSym + rName + rStartOptional + rSeparator + rBody + rEndOptional + rEndSym
-
# This method parses the content of a check line stripped of the initial
# comment symbol and the CHECK keyword.
def __parse(self, line):
@@ -251,9 +249,9 @@
# Loop as long as there is something to parse.
while line:
# Search for the nearest occurrence of the special markers.
- matchWhitespace = re.search("\s+", line)
- matchPattern = re.search(self.__getPatternRegex(), line)
- matchVariable = re.search(self.__getVariableRegex(), line)
+ matchWhitespace = re.search(r"\s+", line)
+ matchPattern = re.search(CheckElement.regexPattern, line)
+ matchVariable = re.search(CheckElement.regexVariable, line)
# If one of the above was identified at the current position, extract them
# from the line, parse them and add to the list of line parts.
@@ -262,7 +260,7 @@
# a whitespace, we add a regex pattern for an arbitrary non-zero number
# of whitespaces.
line = line[matchWhitespace.end():]
- lineParts.append(CheckElement.parsePattern("{{\s+}}"))
+ lineParts.append(CheckElement.parsePattern(r"{{\s+}}"))
elif self.__isMatchAtStart(matchPattern):
pattern = line[0:matchPattern.end()]
line = line[matchPattern.end():]
@@ -536,16 +534,16 @@
# followed by the CHECK keyword, given attribute and a colon at the very
# beginning of the line. Whitespaces are ignored.
def _extractLine(self, prefix, line):
- ignoreWhitespace = "\s*"
- commentSymbols = ["//", "#"]
- prefixRegex = ignoreWhitespace + \
- "(" + "|".join(commentSymbols) + ")" + \
- ignoreWhitespace + \
- prefix + ":"
+ rIgnoreWhitespace = r"\s*"
+ rCommentSymbols = [r"//", r"#"]
+ regexPrefix = rIgnoreWhitespace + \
+ r"(" + r"|".join(rCommentSymbols) + r")" + \
+ rIgnoreWhitespace + \
+ prefix + r":"
# The 'match' function succeeds only if the pattern is matched at the
# beginning of the line.
- match = re.match(prefixRegex, line)
+ match = re.match(regexPrefix, line)
if match is not None:
return line[match.end():].strip()
else: