blob: d80fc53c3f4b73d785a4e59e3db763802896e441 [file] [log] [blame]
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.contacts.util;
import com.google.common.annotations.VisibleForTesting;
/**
* Methods related to search.
*/
public class SearchUtil {
public static class MatchedLine {
public int startIndex = -1;
public String line;
@Override
public String toString() {
return "MatchedLine{" +
"line='" + line + '\'' +
", startIndex=" + startIndex +
'}';
}
}
/**
* Given a string with lines delimited with '\n', finds the matching line to the given
* substring.
*
* @param contents The string to search.
* @param substring The substring to search for.
* @return A MatchedLine object containing the matching line and the startIndex of the substring
* match within that line.
*/
public static MatchedLine findMatchingLine(String contents, String substring) {
final MatchedLine matched = new MatchedLine();
// Snippet may contain multiple lines separated by "\n".
// Locate the lines of the content that contain the substring.
final int index = SearchUtil.contains(contents, substring);
if (index != -1) {
// Match found. Find the corresponding line.
int start = index - 1;
while (start > -1) {
if (contents.charAt(start) == '\n') {
break;
}
start--;
}
int end = index + 1;
while (end < contents.length()) {
if (contents.charAt(end) == '\n') {
break;
}
end++;
}
matched.line = contents.substring(start + 1, end);
matched.startIndex = index - (start + 1);
}
return matched;
}
/**
* Similar to String.contains() with two main differences:
* <p>
* 1) Only searches token prefixes. A token is defined as any combination of letters or
* numbers.
* <p>
* 2) Returns the starting index where the substring is found.
*
* @param value The string to search.
* @param substring The substring to look for.
* @return The starting index where the substring is found. {@literal -1} if substring is not
* found in value.
*/
@VisibleForTesting
static int contains(String value, String substring) {
if (value.length() < substring.length()) {
return -1;
}
// i18n support
// Generate the code points for the substring once.
// There will be a maximum of substring.length code points. But may be fewer.
// Since the array length is not an accurate size, we need to keep a separate variable.
final int[] substringCodePoints = new int[substring.length()];
int substringLength = 0; // may not equal substring.length()!!
for (int i = 0; i < substring.length(); ) {
final int codePoint = Character.codePointAt(substring, i);
substringCodePoints[substringLength] = codePoint;
substringLength++;
i += Character.charCount(codePoint);
}
for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
int numMatch = 0;
for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
int valueCp = Character.toLowerCase(value.codePointAt(j));
int substringCp = substringCodePoints[numMatch];
if (valueCp != substringCp) {
break;
}
j += Character.charCount(valueCp);
}
if (numMatch == substringLength) {
return i;
}
}
return -1;
}
/**
* Find the start of the next token. A token is composed of letters and numbers. Any other
* character are considered delimiters.
*
* @param line The string to search for the next token.
* @param startIndex The index to start searching. 0 based indexing.
* @return The index for the start of the next token. line.length() if next token not found.
*/
@VisibleForTesting
static int findNextTokenStart(String line, int startIndex) {
int index = startIndex;
// If already in token, eat remainder of token.
while (index <= line.length()) {
if (index == line.length()) {
// No more tokens.
return index;
}
final int codePoint = line.codePointAt(index);
if (!Character.isLetterOrDigit(codePoint)) {
break;
}
index += Character.charCount(codePoint);
}
// Out of token, eat all consecutive delimiters.
while (index <= line.length()) {
if (index == line.length()) {
return index;
}
final int codePoint = line.codePointAt(index);
if (Character.isLetterOrDigit(codePoint)) {
break;
}
index += Character.charCount(codePoint);
}
return index;
}
/**
* Anything other than letter and numbers are considered delimiters. Remove start and end
* delimiters since they are not relevant to search.
*
* @param query The query string to clean.
* @return The cleaned query. Empty string if all characters are cleaned out.
*/
public static String cleanStartAndEndOfSearchQuery(String query) {
int start = 0;
while (start < query.length()) {
int codePoint = query.codePointAt(start);
if (Character.isLetterOrDigit(codePoint)) {
break;
}
start += Character.charCount(codePoint);
}
if (start == query.length()) {
// All characters are delimiters.
return "";
}
int end = query.length() - 1;
while (end > -1) {
if (Character.isLowSurrogate(query.charAt(end))) {
// Assume valid i18n string. There should be a matching high surrogate before it.
end--;
}
int codePoint = query.codePointAt(end);
if (Character.isLetterOrDigit(codePoint)) {
break;
}
end--;
}
// end is a letter or digit.
return query.substring(start, end + 1);
}
}