Remove String.setCharAt().
The internal API String.setCharAt() breaks the assumption
that strings are really immutable. That in turn breaks
string compression invariants - compressible strings must
be compressed. This CL removes the String.setCharAt() API.
The method was used only in String.replace(char, char)
when we found a match, copying the string on first match.
Instead, introduce a new native method that does the whole
replacement with a single call when we find the first match.
StringReplaceBenchmark results on Nexus 6P, lower is better:
timeReplaceCharNonExistent/EMPTY 41.93 -> 38.25 (-9%)
timeReplaceCharNonExistent/L_16 114.90 -> 95.09 (-17%)
timeReplaceCharNonExistent/L_64 419.97 -> 320.65 (-24%)
timeReplaceCharNonExistent/L_256 1667.01 -> 1091.25 (-35%)
timeReplaceCharNonExistent/L_512 3253.50 -> 2075.62 (-36%)
timeReplaceCharRepeated/EMPTY 41.93 -> 39.58 (-6%)
timeReplaceCharRepeated/L_16 114.87 -> 95.40 (-17%)
timeReplaceCharRepeated/L_64 1267.29 -> 704.32 (-44%)
timeReplaceCharRepeated/L_256 5139.14 -> 1361.80 (-74%)
timeReplaceCharRepeated/L_512 10787.81 -> 2338.41 (-78%)
timeReplaceSingleChar/EMPTY 41.78 -> 37.16 (-11%)
timeReplaceSingleChar/L_16 449.54 -> 497.51 (+11%)
timeReplaceSingleChar/L_64 942.08 -> 891.35 (-5%)
timeReplaceSingleChar/L_256 2756.18 -> 2174.64 (-21%)
timeReplaceSingleChar/L_512 5489.91 -> 3983.32 (-27%)
Test: testrunner.py --host
Test: run-libcore-tests.sh --mode=host
Test: testrunner.py --host with string compression enabled.
Test: run-libcore-tests.sh --mode=host with string compression enabled.
Bug: 31040547
Change-Id: I9cf0d5457182f0a33ca8251c29931d3eb624ae07
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index af0478c..80554c2 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1330,17 +1330,18 @@
result->SetC(string->CharAt(index));
}
-// This allows setting chars from the new style of String objects during compilation.
-void UnstartedRuntime::UnstartedStringSetCharAt(
- Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset) {
- jint index = shadow_frame->GetVReg(arg_offset + 1);
- jchar c = shadow_frame->GetVReg(arg_offset + 2);
- mirror::String* string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+// This allows creating String objects with replaced characters during compilation.
+// String.doReplace(char, char) is called from String.replace(char, char) when there is a match.
+void UnstartedRuntime::UnstartedStringDoReplace(
+ Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+ jchar old_c = shadow_frame->GetVReg(arg_offset + 1);
+ jchar new_c = shadow_frame->GetVReg(arg_offset + 2);
+ ObjPtr<mirror::String> string = shadow_frame->GetVRegReference(arg_offset)->AsString();
if (string == nullptr) {
- AbortTransactionOrFail(self, "String.setCharAt with null object");
+ AbortTransactionOrFail(self, "String.replaceWithMatch with null object");
return;
}
- string->SetCharAt(index, c);
+ result->SetL(string->DoReplace(self, old_c, new_c));
}
// This allows creating the new style of String objects during compilation.
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index 6fc7989..e9435e4 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -63,7 +63,7 @@
V(RuntimeAvailableProcessors, "int java.lang.Runtime.availableProcessors()") \
V(StringGetCharsNoCheck, "void java.lang.String.getCharsNoCheck(int, int, char[], int)") \
V(StringCharAt, "char java.lang.String.charAt(int)") \
- V(StringSetCharAt, "void java.lang.String.setCharAt(int, char)") \
+ V(StringDoReplace, "java.lang.String java.lang.String.doReplace(char, char)") \
V(StringFactoryNewStringFromChars, "java.lang.String java.lang.StringFactory.newStringFromChars(int, int, char[])") \
V(StringFactoryNewStringFromString, "java.lang.String java.lang.StringFactory.newStringFromString(java.lang.String)") \
V(StringFastSubstring, "java.lang.String java.lang.String.fastSubstring(int, int)") \
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index c2407d7..57b20a1 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -36,7 +36,7 @@
namespace mirror {
inline uint32_t String::ClassSize(PointerSize pointer_size) {
- uint32_t vtable_entries = Object::kVTableLength + 57;
+ uint32_t vtable_entries = Object::kVTableLength + 56;
return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
}
@@ -311,9 +311,7 @@
inline bool String::AllASCII(const MemoryType* chars, const int length) {
static_assert(std::is_unsigned<MemoryType>::value, "Expecting unsigned MemoryType");
for (int i = 0; i < length; ++i) {
- // Valid ASCII characters are in range 1..0x7f. Zero is not considered ASCII
- // because it would complicate the detection of ASCII strings in Modified-UTF8.
- if ((chars[i] - 1u) >= 0x7fu) {
+ if (!IsASCII(chars[i])) {
return false;
}
}
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 0ab0bd6..884b88a 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -79,14 +79,55 @@
}
}
-void String::SetCharAt(int32_t index, uint16_t c) {
- DCHECK((index >= 0) && (index < GetLength()));
- if (IsCompressed()) {
- // TODO: Handle the case where String is compressed and c is non-ASCII
- GetValueCompressed()[index] = static_cast<uint8_t>(c);
- } else {
- GetValue()[index] = c;
+inline bool String::AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii) {
+ DCHECK(!IsASCII(non_ascii));
+ for (int32_t i = 0; i < length; ++i) {
+ if (!IsASCII(chars[i]) && chars[i] != non_ascii) {
+ return false;
+ }
}
+ return true;
+}
+
+ObjPtr<String> String::DoReplace(Thread* self, uint16_t old_c, uint16_t new_c) {
+ DCHECK(IsCompressed() ? ContainsElement(ArrayRef<uint8_t>(value_compressed_, GetLength()), old_c)
+ : ContainsElement(ArrayRef<uint16_t>(value_, GetLength()), old_c));
+ int32_t length = GetLength();
+ bool compressible =
+ kUseStringCompression &&
+ IsASCII(new_c) &&
+ (IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(value_, length, old_c)));
+ gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+ const int32_t length_with_flag = String::GetFlaggedCount(GetLength(), compressible);
+ SetStringCountVisitor visitor(length_with_flag);
+ ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
+ if (UNLIKELY(string == nullptr)) {
+ return nullptr;
+ }
+ if (compressible) {
+ auto replace = [old_c, new_c](uint16_t c) {
+ return dchecked_integral_cast<uint8_t>((old_c != c) ? c : new_c);
+ };
+ uint8_t* out = string->value_compressed_;
+ if (LIKELY(IsCompressed())) { // LIKELY(compressible == IsCompressed())
+ std::transform(value_compressed_, value_compressed_ + length, out, replace);
+ } else {
+ std::transform(value_, value_ + length, out, replace);
+ }
+ DCHECK(kUseStringCompression && AllASCII(out, length));
+ } else {
+ auto replace = [old_c, new_c](uint16_t c) {
+ return (old_c != c) ? c : new_c;
+ };
+ uint16_t* out = string->value_;
+ if (UNLIKELY(IsCompressed())) { // LIKELY(compressible == IsCompressed())
+ std::transform(value_compressed_, value_compressed_ + length, out, replace);
+ } else {
+ std::transform(value_, value_ + length, out, replace);
+ }
+ DCHECK(!kUseStringCompression || !AllASCII(out, length));
+ }
+ return string;
}
String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 38f6dd4..35ce98e 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -94,7 +94,10 @@
uint16_t CharAt(int32_t index) REQUIRES_SHARED(Locks::mutator_lock_);
- void SetCharAt(int32_t index, uint16_t c) REQUIRES_SHARED(Locks::mutator_lock_);
+ // Create a new string where all occurences of `old_c` are replaced with `new_c`.
+ // String.doReplace(char, char) is called from String.replace(char, char) when there is a match.
+ ObjPtr<String> DoReplace(Thread* self, uint16_t old_c, uint16_t new_c)
+ REQUIRES_SHARED(Locks::mutator_lock_);
ObjPtr<String> Intern() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -229,6 +232,14 @@
REQUIRES_SHARED(Locks::mutator_lock_);
private:
+ static constexpr bool IsASCII(uint16_t c) {
+ // Valid ASCII characters are in range 1..0x7f. Zero is not considered ASCII
+ // because it would complicate the detection of ASCII strings in Modified-UTF8.
+ return (c - 1u) < 0x7fu;
+ }
+
+ static bool AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii);
+
void SetHashCode(int32_t new_hash_code) REQUIRES_SHARED(Locks::mutator_lock_) {
// Hash code is invariant so use non-transactional mode. Also disable check as we may run inside
// a transaction.
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index f1d6ff5..1357338 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -99,9 +99,11 @@
return soa.AddLocalReference<jstring>(result);
}
-static void String_setCharAt(JNIEnv* env, jobject java_this, jint index, jchar c) {
+static jstring String_doReplace(JNIEnv* env, jobject java_this, jchar old_c, jchar new_c) {
ScopedFastNativeObjectAccess soa(env);
- soa.Decode<mirror::String>(java_this)->SetCharAt(index, c);
+ ObjPtr<mirror::String> result =
+ soa.Decode<mirror::String>(java_this)->DoReplace(soa.Self(), old_c, new_c);
+ return soa.AddLocalReference<jstring>(result);
}
static jcharArray String_toCharArray(JNIEnv* env, jobject java_this) {
@@ -114,11 +116,11 @@
NATIVE_METHOD(String, charAt, "!(I)C"),
NATIVE_METHOD(String, compareTo, "!(Ljava/lang/String;)I"),
NATIVE_METHOD(String, concat, "!(Ljava/lang/String;)Ljava/lang/String;"),
+ NATIVE_METHOD(String, doReplace, "!(CC)Ljava/lang/String;"),
NATIVE_METHOD(String, fastIndexOf, "!(II)I"),
NATIVE_METHOD(String, fastSubstring, "!(II)Ljava/lang/String;"),
NATIVE_METHOD(String, getCharsNoCheck, "!(II[CI)V"),
NATIVE_METHOD(String, intern, "!()Ljava/lang/String;"),
- NATIVE_METHOD(String, setCharAt, "!(IC)V"),
NATIVE_METHOD(String, toCharArray, "!()[C"),
};
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index dd89d64..e2a1001 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -33,7 +33,7 @@
14 (class java.lang.Short)
[java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
[private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
-[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.doReplace(char,char), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
[]
[interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
0