Remove String.setCharAt().
The internal API String.setCharAt() breaks the assumption
that strings are really immutable. That in turn breaks
string compression invariants - compressible strings must
be compressed. This CL removes the String.setCharAt() API.
The method was used only in String.replace(char, char)
when we found a match, copying the string on first match.
Instead, introduce a new native method that does the whole
replacement with a single call when we find the first match.
StringReplaceBenchmark results on Nexus 6P, lower is better:
timeReplaceCharNonExistent/EMPTY 41.93 -> 38.25 (-9%)
timeReplaceCharNonExistent/L_16 114.90 -> 95.09 (-17%)
timeReplaceCharNonExistent/L_64 419.97 -> 320.65 (-24%)
timeReplaceCharNonExistent/L_256 1667.01 -> 1091.25 (-35%)
timeReplaceCharNonExistent/L_512 3253.50 -> 2075.62 (-36%)
timeReplaceCharRepeated/EMPTY 41.93 -> 39.58 (-6%)
timeReplaceCharRepeated/L_16 114.87 -> 95.40 (-17%)
timeReplaceCharRepeated/L_64 1267.29 -> 704.32 (-44%)
timeReplaceCharRepeated/L_256 5139.14 -> 1361.80 (-74%)
timeReplaceCharRepeated/L_512 10787.81 -> 2338.41 (-78%)
timeReplaceSingleChar/EMPTY 41.78 -> 37.16 (-11%)
timeReplaceSingleChar/L_16 449.54 -> 497.51 (+11%)
timeReplaceSingleChar/L_64 942.08 -> 891.35 (-5%)
timeReplaceSingleChar/L_256 2756.18 -> 2174.64 (-21%)
timeReplaceSingleChar/L_512 5489.91 -> 3983.32 (-27%)
Test: testrunner.py --host
Test: run-libcore-tests.sh --mode=host
Test: testrunner.py --host with string compression enabled.
Test: run-libcore-tests.sh --mode=host with string compression enabled.
Bug: 31040547
Change-Id: I9cf0d5457182f0a33ca8251c29931d3eb624ae07
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index c2407d7..57b20a1 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -36,7 +36,7 @@
namespace mirror {
inline uint32_t String::ClassSize(PointerSize pointer_size) {
- uint32_t vtable_entries = Object::kVTableLength + 57;
+ uint32_t vtable_entries = Object::kVTableLength + 56;
return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
}
@@ -311,9 +311,7 @@
inline bool String::AllASCII(const MemoryType* chars, const int length) {
static_assert(std::is_unsigned<MemoryType>::value, "Expecting unsigned MemoryType");
for (int i = 0; i < length; ++i) {
- // Valid ASCII characters are in range 1..0x7f. Zero is not considered ASCII
- // because it would complicate the detection of ASCII strings in Modified-UTF8.
- if ((chars[i] - 1u) >= 0x7fu) {
+ if (!IsASCII(chars[i])) {
return false;
}
}
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 0ab0bd6..884b88a 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -79,14 +79,55 @@
}
}
-void String::SetCharAt(int32_t index, uint16_t c) {
- DCHECK((index >= 0) && (index < GetLength()));
- if (IsCompressed()) {
- // TODO: Handle the case where String is compressed and c is non-ASCII
- GetValueCompressed()[index] = static_cast<uint8_t>(c);
- } else {
- GetValue()[index] = c;
+inline bool String::AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii) {
+ DCHECK(!IsASCII(non_ascii));
+ for (int32_t i = 0; i < length; ++i) {
+ if (!IsASCII(chars[i]) && chars[i] != non_ascii) {
+ return false;
+ }
}
+ return true;
+}
+
+ObjPtr<String> String::DoReplace(Thread* self, uint16_t old_c, uint16_t new_c) {
+ DCHECK(IsCompressed() ? ContainsElement(ArrayRef<uint8_t>(value_compressed_, GetLength()), old_c)
+ : ContainsElement(ArrayRef<uint16_t>(value_, GetLength()), old_c));
+ int32_t length = GetLength();
+ bool compressible =
+ kUseStringCompression &&
+ IsASCII(new_c) &&
+ (IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(value_, length, old_c)));
+ gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+ const int32_t length_with_flag = String::GetFlaggedCount(GetLength(), compressible);
+ SetStringCountVisitor visitor(length_with_flag);
+ ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
+ if (UNLIKELY(string == nullptr)) {
+ return nullptr;
+ }
+ if (compressible) {
+ auto replace = [old_c, new_c](uint16_t c) {
+ return dchecked_integral_cast<uint8_t>((old_c != c) ? c : new_c);
+ };
+ uint8_t* out = string->value_compressed_;
+ if (LIKELY(IsCompressed())) { // LIKELY(compressible == IsCompressed())
+ std::transform(value_compressed_, value_compressed_ + length, out, replace);
+ } else {
+ std::transform(value_, value_ + length, out, replace);
+ }
+ DCHECK(kUseStringCompression && AllASCII(out, length));
+ } else {
+ auto replace = [old_c, new_c](uint16_t c) {
+ return (old_c != c) ? c : new_c;
+ };
+ uint16_t* out = string->value_;
+ if (UNLIKELY(IsCompressed())) { // LIKELY(compressible == IsCompressed())
+ std::transform(value_compressed_, value_compressed_ + length, out, replace);
+ } else {
+ std::transform(value_, value_ + length, out, replace);
+ }
+ DCHECK(!kUseStringCompression || !AllASCII(out, length));
+ }
+ return string;
}
String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 38f6dd4..35ce98e 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -94,7 +94,10 @@
uint16_t CharAt(int32_t index) REQUIRES_SHARED(Locks::mutator_lock_);
- void SetCharAt(int32_t index, uint16_t c) REQUIRES_SHARED(Locks::mutator_lock_);
+ // Create a new string where all occurences of `old_c` are replaced with `new_c`.
+ // String.doReplace(char, char) is called from String.replace(char, char) when there is a match.
+ ObjPtr<String> DoReplace(Thread* self, uint16_t old_c, uint16_t new_c)
+ REQUIRES_SHARED(Locks::mutator_lock_);
ObjPtr<String> Intern() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -229,6 +232,14 @@
REQUIRES_SHARED(Locks::mutator_lock_);
private:
+ static constexpr bool IsASCII(uint16_t c) {
+ // Valid ASCII characters are in range 1..0x7f. Zero is not considered ASCII
+ // because it would complicate the detection of ASCII strings in Modified-UTF8.
+ return (c - 1u) < 0x7fu;
+ }
+
+ static bool AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii);
+
void SetHashCode(int32_t new_hash_code) REQUIRES_SHARED(Locks::mutator_lock_) {
// Hash code is invariant so use non-transactional mode. Also disable check as we may run inside
// a transaction.