diff options
| -rw-r--r-- | core/java/android/os/PatternMatcher.java | 382 | ||||
| -rw-r--r-- | core/tests/coretests/src/android/os/OsTests.java | 1 | ||||
| -rw-r--r-- | core/tests/coretests/src/android/os/PatternMatcherTest.java | 234 |
3 files changed, 609 insertions, 8 deletions
diff --git a/core/java/android/os/PatternMatcher.java b/core/java/android/os/PatternMatcher.java index 56dc837549a7..3890fbfafb32 100644 --- a/core/java/android/os/PatternMatcher.java +++ b/core/java/android/os/PatternMatcher.java @@ -16,6 +16,10 @@ package android.os; +import android.util.Log; + +import java.util.Arrays; + /** * A simple pattern matcher, which is safe to use on untrusted data: it does * not provide full reg-exp support, only simple globbing that can not be @@ -44,13 +48,59 @@ public class PatternMatcher implements Parcelable { * wildcard part of a normal regexp. */ public static final int PATTERN_SIMPLE_GLOB = 2; - + + /** + * Pattern type: the given pattern is interpreted with a regular + * expression-like syntax for matching against the string it is tested + * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) + * with full support for character ranges and the not ({@code ^}) modifier. + * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) + * for one-or-more and full range ({@code {...}}) support. This is a simple + * evaulation implementation in which matching is done against the pattern in + * realtime with no backtracking support. + * + * {@hide} Pending approval for public API + */ + public static final int PATTERN_ADVANCED_GLOB = 3; + + // token types for advanced matching + private static final int TOKEN_TYPE_LITERAL = 0; + private static final int TOKEN_TYPE_ANY = 1; + private static final int TOKEN_TYPE_SET = 2; + private static final int TOKEN_TYPE_INVERSE_SET = 3; + + // Return for no match + private static final int NO_MATCH = -1; + + private static final String TAG = "PatternMatcher"; + + // Parsed placeholders for advanced patterns + private static final int PARSED_TOKEN_CHAR_SET_START = -1; + private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; + private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; + private static final int PARSED_TOKEN_CHAR_ANY = -4; + private static final int PARSED_MODIFIER_RANGE_START = -5; + private static final int PARSED_MODIFIER_RANGE_STOP = -6; + private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; + private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; + private final String mPattern; private final int mType; - + private final int[] mParsedPattern; + + + private static final int MAX_PATTERN_STORAGE = 2048; + // workspace to use for building a parsed advanced pattern; + private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; + public PatternMatcher(String pattern, int type) { mPattern = pattern; mType = type; + if (mType == PATTERN_ADVANCED_GLOB) { + mParsedPattern = parseAndVerifyAdvancedPattern(pattern); + } else { + mParsedPattern = null; + } } public final String getPath() { @@ -62,7 +112,7 @@ public class PatternMatcher implements Parcelable { } public boolean match(String str) { - return matchPattern(mPattern, str, mType); + return matchPattern(str, mPattern, mParsedPattern, mType); } public String toString() { @@ -77,6 +127,9 @@ public class PatternMatcher implements Parcelable { case PATTERN_SIMPLE_GLOB: type = "GLOB: "; break; + case PATTERN_ADVANCED_GLOB: + type = "ADVANCED: "; + break; } return "PatternMatcher{" + type + mPattern + "}"; } @@ -88,11 +141,13 @@ public class PatternMatcher implements Parcelable { public void writeToParcel(Parcel dest, int flags) { dest.writeString(mPattern); dest.writeInt(mType); + dest.writeIntArray(mParsedPattern); } public PatternMatcher(Parcel src) { mPattern = src.readString(); mType = src.readInt(); + mParsedPattern = src.createIntArray(); } public static final Parcelable.Creator<PatternMatcher> CREATOR @@ -106,16 +161,21 @@ public class PatternMatcher implements Parcelable { } }; - static boolean matchPattern(String pattern, String match, int type) { + static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { if (match == null) return false; if (type == PATTERN_LITERAL) { return pattern.equals(match); } if (type == PATTERN_PREFIX) { return match.startsWith(pattern); - } else if (type != PATTERN_SIMPLE_GLOB) { - return false; + } else if (type == PATTERN_SIMPLE_GLOB) { + return matchGlobPattern(pattern, match); + } else if (type == PATTERN_ADVANCED_GLOB) { + return matchAdvancedPattern(parsedPattern, match); } - + return false; + } + + static boolean matchGlobPattern(String pattern, String match) { final int NP = pattern.length(); if (NP <= 0) { return match.length() <= 0; @@ -194,4 +254,310 @@ public class PatternMatcher implements Parcelable { return false; } -} + + /** + * Parses the advanced pattern and returns an integer array representation of it. The integer + * array treats each field as a character if positive and a unique token placeholder if + * negative. This method will throw on any pattern structure violations. + */ + synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { + int ip = 0; + final int LP = pattern.length(); + + int it = 0; + + boolean inSet = false; + boolean inRange = false; + boolean inCharClass = false; + + boolean addToParsedPattern; + + while (ip < LP) { + if (it > MAX_PATTERN_STORAGE - 3) { + throw new IllegalArgumentException("Pattern is too large!"); + } + + char c = pattern.charAt(ip); + addToParsedPattern = false; + + switch (c) { + case '[': + if (inSet) { + addToParsedPattern = true; // treat as literal or char class in set + } else { + if (pattern.charAt(ip + 1) == '^') { + sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; + ip++; // skip over the '^' + } else { + sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; + } + ip++; // move to the next pattern char + inSet = true; + continue; + } + break; + case ']': + if (!inSet) { + addToParsedPattern = true; // treat as literal outside of set + } else { + int parsedToken = sParsedPatternScratch[it - 1]; + if (parsedToken == PARSED_TOKEN_CHAR_SET_START || + parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { + throw new IllegalArgumentException( + "You must define characters in a set."); + } + sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; + inSet = false; + inCharClass = false; + } + break; + case '{': + if (!inSet) { + if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { + throw new IllegalArgumentException("Modifier must follow a token."); + } + sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; + ip++; + inRange = true; + } + break; + case '}': + if (inRange) { // only terminate the range if we're currently in one + sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; + inRange = false; + } + break; + case '*': + if (!inSet) { + if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { + throw new IllegalArgumentException("Modifier must follow a token."); + } + sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; + } + break; + case '+': + if (!inSet) { + if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { + throw new IllegalArgumentException("Modifier must follow a token."); + } + sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; + } + break; + case '.': + if (!inSet) { + sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; + } + break; + case '\\': // escape + if (ip + 1 >= LP) { + throw new IllegalArgumentException("Escape found at end of pattern!"); + } + c = pattern.charAt(++ip); + addToParsedPattern = true; + break; + default: + addToParsedPattern = true; + break; + } + if (inSet) { + if (inCharClass) { + sParsedPatternScratch[it++] = c; + inCharClass = false; + } else { + // look forward for character class + if (ip + 2 < LP + && pattern.charAt(ip + 1) == '-' + && pattern.charAt(ip + 2) != ']') { + inCharClass = true; + sParsedPatternScratch[it++] = c; // set first token as lower end of range + ip++; // advance past dash + } else { // literal + sParsedPatternScratch[it++] = c; // set first token as literal + sParsedPatternScratch[it++] = c; // set second set as literal + } + } + } else if (inRange) { + int endOfSet = pattern.indexOf('}', ip); + if (endOfSet < 0) { + throw new IllegalArgumentException("Range not ended with '}'"); + } + String rangeString = pattern.substring(ip, endOfSet); + int commaIndex = rangeString.indexOf(','); + try { + final int rangeMin; + final int rangeMax; + if (commaIndex < 0) { + int parsedRange = Integer.parseInt(rangeString); + rangeMin = rangeMax = parsedRange; + } else { + rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); + if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) + rangeMax = Integer.MAX_VALUE; + } else { + rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); + } + } + if (rangeMin > rangeMax) { + throw new IllegalArgumentException( + "Range quantifier minimum is greater than maximum"); + } + sParsedPatternScratch[it++] = rangeMin; + sParsedPatternScratch[it++] = rangeMax; + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Range number format incorrect", e); + } + ip = endOfSet; + continue; // don't increment ip + } else if (addToParsedPattern) { + sParsedPatternScratch[it++] = c; + } + ip++; + } + if (inSet) { + throw new IllegalArgumentException("Set was not terminated!"); + } + return Arrays.copyOf(sParsedPatternScratch, it); + } + + private static boolean isParsedModifier(int parsedChar) { + return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || + parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || + parsedChar == PARSED_MODIFIER_RANGE_STOP || + parsedChar == PARSED_MODIFIER_RANGE_START; + } + + static boolean matchAdvancedPattern(int[] parsedPattern, String match) { + + // create indexes + int ip = 0, im = 0; + + // one-time length check + final int LP = parsedPattern.length, LM = match.length(); + + // The current character being analyzed in the pattern + int patternChar; + + int tokenType; + + int charSetStart = 0, charSetEnd = 0; + + while (ip < LP) { // we still have content in the pattern + + patternChar = parsedPattern[ip]; + // get the match type of the next verb + + switch (patternChar) { + case PARSED_TOKEN_CHAR_ANY: + tokenType = TOKEN_TYPE_ANY; + ip++; + break; + case PARSED_TOKEN_CHAR_SET_START: + case PARSED_TOKEN_CHAR_SET_INVERSE_START: + tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START + ? TOKEN_TYPE_SET + : TOKEN_TYPE_INVERSE_SET; + charSetStart = ip + 1; // start from the char after the set start + while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); + charSetEnd = ip - 1; // we're on the set stop, end is the previous + ip++; // move the pointer to the next pattern entry + break; + default: + charSetStart = ip; + tokenType = TOKEN_TYPE_LITERAL; + ip++; + break; + } + + final int minRepetition; + final int maxRepetition; + + // look for a match length modifier + if (ip >= LP) { + minRepetition = maxRepetition = 1; + } else { + patternChar = parsedPattern[ip]; + switch (patternChar) { + case PARSED_MODIFIER_ZERO_OR_MORE: + minRepetition = 0; + maxRepetition = Integer.MAX_VALUE; + ip++; + break; + case PARSED_MODIFIER_ONE_OR_MORE: + minRepetition = 1; + maxRepetition = Integer.MAX_VALUE; + ip++; + break; + case PARSED_MODIFIER_RANGE_START: + minRepetition = parsedPattern[++ip]; + maxRepetition = parsedPattern[++ip]; + ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token + break; + default: + minRepetition = maxRepetition = 1; // implied literal + break; + } + } + if (minRepetition > maxRepetition) { + return false; + } + + // attempt to match as many characters as possible + int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, + parsedPattern, charSetStart, charSetEnd); + + // if we found a conflict, return false immediately + if (matched == NO_MATCH) { + return false; + } + + // move the match pointer the number of characters matched + im += matched; + } + return ip >= LP && im >= LM; // have parsed entire string and regex + } + + private static int matchChars(String match, int im, final int lm, int tokenType, + int minRepetition, int maxRepetition, int[] parsedPattern, + int tokenStart, int tokenEnd) { + int matched = 0; + + while(matched < maxRepetition + && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, + tokenEnd)) { + matched++; + } + + return matched < minRepetition ? NO_MATCH : matched; + } + + private static boolean matchChar(String match, int im, final int lm, int tokenType, + int[] parsedPattern, int tokenStart, int tokenEnd) { + if (im >= lm) { // we've overrun the string, no match + return false; + } + switch (tokenType) { + case TOKEN_TYPE_ANY: + return true; + case TOKEN_TYPE_SET: + for (int i = tokenStart; i < tokenEnd; i += 2) { + char matchChar = match.charAt(im); + if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { + return true; + } + } + return false; + case TOKEN_TYPE_INVERSE_SET: + for (int i = tokenStart; i < tokenEnd; i += 2) { + char matchChar = match.charAt(im); + if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { + return false; + } + } + return true; + case TOKEN_TYPE_LITERAL: + return match.charAt(im) == parsedPattern[tokenStart]; + default: + return false; + } + } +}
\ No newline at end of file diff --git a/core/tests/coretests/src/android/os/OsTests.java b/core/tests/coretests/src/android/os/OsTests.java index 582bf1ae204a..985fa4f3cfc8 100644 --- a/core/tests/coretests/src/android/os/OsTests.java +++ b/core/tests/coretests/src/android/os/OsTests.java @@ -32,6 +32,7 @@ public class OsTests { suite.addTestSuite(IdleHandlerTest.class); suite.addTestSuite(MessageQueueTest.class); suite.addTestSuite(MessengerTest.class); + suite.addTestSuite(PatternMatcherTest.class); suite.addTestSuite(SystemPropertiesTest.class); return suite; diff --git a/core/tests/coretests/src/android/os/PatternMatcherTest.java b/core/tests/coretests/src/android/os/PatternMatcherTest.java new file mode 100644 index 000000000000..9645ccc11b76 --- /dev/null +++ b/core/tests/coretests/src/android/os/PatternMatcherTest.java @@ -0,0 +1,234 @@ +package android.os; + +import android.test.suitebuilder.annotation.SmallTest; +import junit.framework.TestCase; +import org.junit.runner.RunWith; +import org.junit.Test; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +@SmallTest +public class PatternMatcherTest extends TestCase{ + + @Test + public void testAdvancedPatternMatchesAnyToken() { + PatternMatcher matcher = new PatternMatcher(".", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a", matcher); + assertMatches("b", matcher); + assertNotMatches("", matcher); + } + + @Test + public void testAdvancedPatternMatchesSetToken() { + PatternMatcher matcher = new PatternMatcher("[a]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a", matcher); + assertNotMatches("b", matcher); + + matcher = new PatternMatcher("[.*+{}\\]\\\\[]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches(".", matcher); + assertMatches("*", matcher); + assertMatches("+", matcher); + assertMatches("{", matcher); + assertMatches("}", matcher); + assertMatches("]", matcher); + assertMatches("\\", matcher); + assertMatches("[", matcher); + } + + @Test + public void testAdvancedPatternMatchesSetCharacterClassToken() { + PatternMatcher matcher = new PatternMatcher("[a-z]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a", matcher); + assertMatches("b", matcher); + assertNotMatches("A", matcher); + assertNotMatches("1", matcher); + + matcher = new PatternMatcher("[a-z][0-9]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a1", matcher); + assertNotMatches("1a", matcher); + assertNotMatches("aa", matcher); + + matcher = new PatternMatcher("[z-a]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertNotMatches("a", matcher); + assertNotMatches("z", matcher); + assertNotMatches("A", matcher); + + matcher = new PatternMatcher("[^0-9]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a", matcher); + assertMatches("z", matcher); + assertMatches("A", matcher); + assertNotMatches("9", matcher); + assertNotMatches("5", matcher); + assertNotMatches("0", matcher); + + assertPoorlyFormattedPattern("[]a]"); + matcher = new PatternMatcher("[\\[a]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a", matcher); + assertMatches("[", matcher); + } + + @Test + public void testAdvancedPatternMatchesEscapedCharacters() { + PatternMatcher matcher = new PatternMatcher("\\.", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches(".", matcher); + assertNotMatches("a", matcher); + assertNotMatches("1", matcher); + + matcher = new PatternMatcher("a\\+", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a+", matcher); + assertNotMatches("a", matcher); + assertNotMatches("aaaaa", matcher); + + matcher = new PatternMatcher("[\\a-\\z]", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertMatches("a", matcher); + assertMatches("z", matcher); + assertNotMatches("A", matcher); + } + + @Test + public void testAdvancedPatternMatchesLiteralTokens() { + PatternMatcher matcher = new PatternMatcher("a", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertNotMatches("", matcher); + assertMatches("a", matcher); + assertNotMatches("z", matcher); + + matcher = new PatternMatcher("az", PatternMatcher.PATTERN_ADVANCED_GLOB); + assertNotMatches("", matcher); + assertMatches("az", matcher); + assertNotMatches("za", matcher); + } + + @Test + public void testAdvancedPatternMatchesSetZeroOrMore() { + PatternMatcher matcher = new PatternMatcher("[a-z]*", PatternMatcher.PATTERN_ADVANCED_GLOB); + + assertMatches("", matcher); + assertMatches("a", matcher); + assertMatches("abcdefg", matcher); + assertNotMatches("abc1", matcher); + assertNotMatches("1abc", matcher); + } + + @Test + public void testAdvancedPatternMatchesSetOneOrMore() { + PatternMatcher matcher = new PatternMatcher("[a-z]+", PatternMatcher.PATTERN_ADVANCED_GLOB); + + assertNotMatches("", matcher); + assertMatches("a", matcher); + assertMatches("abcdefg", matcher); + assertNotMatches("abc1", matcher); + assertNotMatches("1abc", matcher); + } + + + @Test + public void testAdvancedPatternMatchesSingleRange() { + PatternMatcher matcher = new PatternMatcher("[a-z]{1}", + PatternMatcher.PATTERN_ADVANCED_GLOB); + + assertNotMatches("", matcher); + assertMatches("a", matcher); + assertMatches("z", matcher); + assertNotMatches("1", matcher); + assertNotMatches("aa", matcher); + } + + @Test + public void testAdvancedPatternMatchesFullRange() { + PatternMatcher matcher = new PatternMatcher("[a-z]{1,5}", + PatternMatcher.PATTERN_ADVANCED_GLOB); + + assertNotMatches("", matcher); + assertMatches("a", matcher); + assertMatches("zazaz", matcher); + assertNotMatches("azazaz", matcher); + assertNotMatches("11111", matcher); + } + + @Test + public void testAdvancedPatternMatchesPartialRange() { + PatternMatcher matcher = new PatternMatcher("[a-z]{3,}", + PatternMatcher.PATTERN_ADVANCED_GLOB); + + assertNotMatches("", matcher); + assertMatches("aza", matcher); + assertMatches("zazaz", matcher); + assertMatches("azazazazazaz", matcher); + assertNotMatches("aa", matcher); + } + + @Test + public void testAdvancedPatternMatchesComplexPatterns() { + PatternMatcher matcher = new PatternMatcher( + "/[0-9]{4}/[0-9]{2}/[0-9]{2}/[a-zA-Z0-9_]+\\.html", + PatternMatcher.PATTERN_ADVANCED_GLOB); + + assertNotMatches("", matcher); + assertMatches("/2016/09/07/got_this_working.html", matcher); + assertMatches("/2016/09/07/got_this_working2.html", matcher); + assertNotMatches("/2016/09/07/got_this_working2dothtml", matcher); + assertNotMatches("/2016/9/7/got_this_working.html", matcher); + + matcher = new PatternMatcher( + "/b*a*bar.*", + PatternMatcher.PATTERN_ADVANCED_GLOB); + + assertMatches("/babar", matcher); + assertMatches("/babarfff", matcher); + assertMatches("/bbaabarfff", matcher); + assertMatches("/babar?blah", matcher); + assertMatches("/baaaabar?blah", matcher); + assertNotMatches("?bar", matcher); + assertNotMatches("/bar", matcher); + assertNotMatches("/baz", matcher); + assertNotMatches("/ba/bar", matcher); + assertNotMatches("/barf", matcher); + assertNotMatches("/", matcher); + assertNotMatches("?blah", matcher); + } + + @Test + public void testAdvancedPatternPoorFormatThrowsIllegalArgumentException() { + assertPoorlyFormattedPattern("[a-z"); + assertPoorlyFormattedPattern("a{,4}"); + assertPoorlyFormattedPattern("a{0,a}"); + assertPoorlyFormattedPattern("a{\\1, 2}"); + assertPoorlyFormattedPattern("[]"); + assertPoorlyFormattedPattern("a{}"); + assertPoorlyFormattedPattern("{3,4}"); + assertPoorlyFormattedPattern("a+{3,4}"); + assertPoorlyFormattedPattern("*."); + assertPoorlyFormattedPattern(".+*"); + assertPoorlyFormattedPattern("a{3,4"); + assertPoorlyFormattedPattern("[a"); + assertPoorlyFormattedPattern("abc\\"); + assertPoorlyFormattedPattern("+."); + + StringBuilder charSet = new StringBuilder("["); + for (int i = 0; i < 1024; i++) { + charSet.append('a' + (i % 26)); + } + charSet.append("]"); + assertPoorlyFormattedPattern(charSet.toString()); + } + + private void assertMatches(String string, PatternMatcher matcher) { + assertTrue("'" + string + "' should match '" + matcher.toString() + "'", + matcher.match(string)); + } + + private void assertNotMatches(String string, PatternMatcher matcher) { + assertTrue("'" + string + "' should not match '" + matcher.toString() + "'", + !matcher.match(string)); + } + + private void assertPoorlyFormattedPattern(String format) { + try { + new PatternMatcher(format, PatternMatcher.PATTERN_ADVANCED_GLOB); + } catch (IllegalArgumentException e) { + return;// expected + } + + fail("'" + format + "' was erroneously created"); + } +} |