blob: 28fc4f002b09073bd0f19c31ea7cad46381e594c [file] [log] [blame]
/*
* Copyright (C) 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.launcher3.search;
import android.text.TextUtils;
import com.android.launcher3.util.IntArray;
import java.text.Collator;
import java.util.stream.IntStream;
/**
* Utilities for matching query string to target string.
*/
public class StringMatcherUtility {
private static final Character SPACE = ' ';
/**
* Returns {@code true} if {@code query} is a prefix of a substring in {@code target}. How to
* break target to valid substring is defined in the given {@code matcher}.
*/
public static boolean matches(String query, String target, StringMatcher matcher) {
int queryLength = query.length();
int targetLength = target.length();
if (targetLength < queryLength || queryLength <= 0) {
return false;
}
if (requestSimpleFuzzySearch(query)) {
return target.toLowerCase().contains(query);
}
int lastType;
int thisType = Character.UNASSIGNED;
int nextType = Character.getType(target.codePointAt(0));
int end = targetLength - queryLength;
for (int i = 0; i <= end; i++) {
lastType = thisType;
thisType = nextType;
nextType = i < (targetLength - 1)
? Character.getType(target.codePointAt(i + 1)) : Character.UNASSIGNED;
if (matcher.isBreak(thisType, lastType, nextType)
&& matcher.matches(query, target.substring(i, i + queryLength))) {
return true;
}
}
return false;
}
/**
* Returns a list of breakpoints wherever the string contains a break. For example:
* "t-mobile" would have breakpoints at [0, 1]
* "Agar.io" would have breakpoints at [3, 4]
* "LEGO®Builder" would have a breakpoint at [4]
*/
public static IntArray getListOfBreakpoints(CharSequence input, StringMatcher matcher) {
int inputLength = input.length();
if ((inputLength <= 2) || TextUtils.indexOf(input, SPACE) != -1) {
// when there is a space in the string, return a list where the elements are the
// position of the spaces - 1. This is to make the logic consistent where breakpoints
// are placed
return IntArray.wrap(IntStream.range(0, inputLength)
.filter(i -> input.charAt(i) == SPACE)
.map(i -> i - 1)
.toArray());
}
IntArray listOfBreakPoints = new IntArray();
int prevType;
int thisType = Character.getType(Character.codePointAt(input, 0));
int nextType = Character.getType(Character.codePointAt(input, 1));
for (int i = 1; i < inputLength; i++) {
prevType = thisType;
thisType = nextType;
nextType = i < (inputLength - 1)
? Character.getType(Character.codePointAt(input, i + 1))
: Character.UNASSIGNED;
if (matcher.isBreak(thisType, prevType, nextType)) {
// breakpoint is at previous
listOfBreakPoints.add(i-1);
}
}
return listOfBreakPoints;
}
/**
* Performs locale sensitive string comparison using {@link Collator}.
*/
public static class StringMatcher {
private static final char MAX_UNICODE = '\uFFFF';
private final Collator mCollator;
StringMatcher() {
// On android N and above, Collator uses ICU implementation which has a much better
// support for non-latin locales.
mCollator = Collator.getInstance();
mCollator.setStrength(Collator.PRIMARY);
mCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
}
/**
* Returns true if {@param query} is a prefix of {@param target}
*/
public boolean matches(String query, String target) {
switch (mCollator.compare(query, target)) {
case 0:
return true;
case -1:
// The target string can contain a modifier which would make it larger than
// the query string (even though the length is same). If the query becomes
// larger after appending a unicode character, it was originally a prefix of
// the target string and hence should match.
return mCollator.compare(query + MAX_UNICODE, target) > -1;
default:
return false;
}
}
public static StringMatcher getInstance() {
return new StringMatcher();
}
/**
* Returns true if the current point should be a break point.
*
* Following cases are considered as break points:
* 1) Any non space character after a space character
* 2) Any digit after a non-digit character
* 3) Any capital character after a digit or small character
* 4) Any capital character before a small character
*
* E.g., "YouTube" matches the input "you" and "tube", but not "out".
*/
protected boolean isBreak(int thisType, int prevType, int nextType) {
switch (prevType) {
case Character.UNASSIGNED:
case Character.SPACE_SEPARATOR:
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
return true;
}
switch (thisType) {
case Character.UPPERCASE_LETTER:
// takes care of the case where there are consistent uppercase letters as well
// as a special symbol following the capitalize letters for example: LEGO®
if (nextType != Character.UPPERCASE_LETTER && nextType != Character.OTHER_SYMBOL
&& nextType != Character.DECIMAL_DIGIT_NUMBER
&& nextType != Character.UNASSIGNED) {
return true;
}
// Follow through
case Character.TITLECASE_LETTER:
// Break point if previous was not a upper case
return prevType != Character.UPPERCASE_LETTER;
case Character.LOWERCASE_LETTER:
// Break point if previous was not a letter.
return prevType > Character.OTHER_LETTER || prevType <= Character.UNASSIGNED;
case Character.DECIMAL_DIGIT_NUMBER:
case Character.LETTER_NUMBER:
case Character.OTHER_NUMBER:
// Break point if previous was not a number
return !(prevType == Character.DECIMAL_DIGIT_NUMBER
|| prevType == Character.LETTER_NUMBER
|| prevType == Character.OTHER_NUMBER);
case Character.MATH_SYMBOL:
case Character.CURRENCY_SYMBOL:
case Character.OTHER_PUNCTUATION:
case Character.DASH_PUNCTUATION:
// Always a break point for a symbol
return true;
default:
return false;
}
}
}
/**
* Subclass of {@code StringMatcher} using simple space break for prefix matching.
* E.g., "YouTube" matches the input "you". "Play Store" matches the input "play".
*/
public static class StringMatcherSpace extends StringMatcher {
public static StringMatcherSpace getInstance() {
return new StringMatcherSpace();
}
/**
* The first character or any character after a space is considered as a break point.
* Returns true if the current point should be a break point.
*/
@Override
protected boolean isBreak(int thisType, int prevType, int nextType) {
return prevType == Character.UNASSIGNED || prevType == Character.SPACE_SEPARATOR;
}
}
/**
* Matching optimization to search in Chinese.
*/
private static boolean requestSimpleFuzzySearch(String s) {
for (int i = 0; i < s.length(); ) {
int codepoint = s.codePointAt(i);
i += Character.charCount(codepoint);
switch (Character.UnicodeScript.of(codepoint)) {
case HAN:
//Character.UnicodeScript.HAN: use String.contains to match
return true;
}
}
return false;
}
}