Optimize some commonly used utf8 functions by:
- using counted loops instead of searching for terminating null. In
the important cases the caller already knows the length: change
the API to pass it in. Keep the old API version as well to avoid
extensive changes to non-critical debug and test code.
- ensure the common cases are at the start of if/then/else chains.
Usually 99+% of characters are ASCII even in mixed strings.
- for the "convert" functions, when both utf8 and utf16 lengths are
passed, and are equal, it means the entire string is ASCII, and a
specialized loop can be used. The compiler might then unroll or
even vectorize this.
The functions improved are (tested on Nexus 5 with a 44 character
ASCII string):
CountModifiedUtf8Chars : 20% faster
ConvertUtf16ToModifiedUtf8: 80% faster
ConvertModifiedUtf8ToUtf16: 200% faster
Also for completeness CountUtf8Bytes has been cleaned up a little, but
the speed is unchanged. Unlike CountModifiedUtf8Chars, it was already
passed the length, rather than searching for null.
Change-Id: I1c9b7dea3eda869fc9f5f6b4dd6be8cdd5bc3ac0
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index be869d4..33aca03 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -109,12 +109,17 @@
String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
DCHECK(utf != nullptr);
- size_t char_count = CountModifiedUtf8Chars(utf);
- return AllocFromModifiedUtf8(self, char_count, utf);
+ size_t byte_count = strlen(utf);
+ size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
+ return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
+}
+
+String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) {
+ return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
}
String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
- const char* utf8_data_in) {
+ const char* utf8_data_in, int32_t utf8_length) {
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
SetStringCountVisitor visitor(utf16_length);
String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
@@ -122,7 +127,7 @@
return nullptr;
}
uint16_t* utf16_data_out = string->GetValue();
- ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
+ ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
return string;
}
@@ -217,7 +222,7 @@
const uint16_t* chars = GetValue();
size_t byte_count = GetUtfLength();
std::string result(byte_count, static_cast<char>(0));
- ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
+ ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength());
return result;
}
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 80ebd2c..e2cfb8d 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -116,6 +116,10 @@
static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+ static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
+ const char* utf8_data_in, int32_t utf8_length)
+ SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);