diff options
author | 2022-02-23 11:20:26 +0000 | |
---|---|---|
committer | 2022-02-24 16:29:37 +0000 | |
commit | 4625f2510ba611459250f1f715faeb2efa2cfb14 (patch) | |
tree | de2222482ed8cbb798ff010bd978ae9c6531c46e | |
parent | eaeaa4f368817fafb181013268f2e786ca0e7a60 (diff) |
Avoid Modified-UTF8 processing for ASCII strings.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 181943478
Change-Id: I9926a0d3e0160aa56ba7a02922388bb3007aaccb
-rw-r--r-- | dex2oat/linker/image_writer.cc | 5 | ||||
-rw-r--r-- | libdexfile/dex/utf.h | 11 | ||||
-rw-r--r-- | runtime/intern_table-inl.h | 30 | ||||
-rw-r--r-- | runtime/intern_table.cc | 35 | ||||
-rw-r--r-- | runtime/intern_table.h | 4 | ||||
-rw-r--r-- | runtime/mirror/string.h | 2 |
6 files changed, 59 insertions, 28 deletions
diff --git a/dex2oat/linker/image_writer.cc b/dex2oat/linker/image_writer.cc index 0a6566ce4a..331b8b7c5e 100644 --- a/dex2oat/linker/image_writer.cc +++ b/dex2oat/linker/image_writer.cc @@ -2019,9 +2019,8 @@ void ImageWriter::LayoutHelper::ProcessInterns(Thread* self) { uint32_t utf16_length; const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &utf16_length); - int32_t hash = ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length); - InternTable::Utf8String utf8_string(utf16_length, utf8_data, hash); - auto intern_it = intern_set.find(utf8_string); + int32_t hash = InternTable::Utf8String::Hash(utf16_length, utf8_data); + auto intern_it = intern_set.find(InternTable::Utf8String(utf16_length, utf8_data, hash)); if (intern_it != intern_set.end()) { mirror::String* string = intern_it->Read<kWithoutReadBarrier>(); DCHECK(string != nullptr); diff --git a/libdexfile/dex/utf.h b/libdexfile/dex/utf.h index 6949319b26..35cbf78463 100644 --- a/libdexfile/dex/utf.h +++ b/libdexfile/dex/utf.h @@ -17,13 +17,14 @@ #ifndef ART_LIBDEXFILE_DEX_UTF_H_ #define ART_LIBDEXFILE_DEX_UTF_H_ -#include "base/macros.h" - #include <stddef.h> #include <stdint.h> #include <string> #include <string_view> +#include <type_traits> + +#include "base/macros.h" /* * All UTF-8 in art is actually modified UTF-8. Mostly, this distinction @@ -97,9 +98,13 @@ void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, */ template<typename MemoryType> int32_t ComputeUtf16Hash(const MemoryType* chars, size_t char_count) { + static_assert(std::is_same_v<MemoryType, char> || + std::is_same_v<MemoryType, uint8_t> || + std::is_same_v<MemoryType, uint16_t>); + using UnsignedMemoryType = std::make_unsigned_t<MemoryType>; uint32_t hash = 0; while (char_count--) { - hash = hash * 31 + *chars++; + hash = hash * 31 + static_cast<UnsignedMemoryType>(*chars++); } return static_cast<int32_t>(hash); } diff --git a/runtime/intern_table-inl.h b/runtime/intern_table-inl.h index 44bdb1fe19..a1319f1de7 100644 --- a/runtime/intern_table-inl.h +++ b/runtime/intern_table-inl.h @@ -28,6 +28,17 @@ namespace art { +inline int32_t InternTable::Utf8String::Hash(uint32_t utf16_length, const char* utf8_data) { + DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data)); + if (LIKELY(utf8_data[utf16_length] == 0)) { + int32_t hash = ComputeUtf16Hash(utf8_data, utf16_length); + DCHECK_EQ(hash, ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length)); + return hash; + } else { + return ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length); + } +} + inline std::size_t InternTable::StringHash::operator()(const GcRoot<mirror::String>& root) const { if (kIsDebugBuild) { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); @@ -55,19 +66,16 @@ inline bool InternTable::StringEquals::operator()(const GcRoot<mirror::String>& if (a_length != b.GetUtf16Length()) { return false; } + DCHECK_GE(strlen(b.GetUtf8Data()), a_length); if (a_string->IsCompressed()) { - size_t b_byte_count = strlen(b.GetUtf8Data()); - size_t b_utf8_length = CountModifiedUtf8Chars(b.GetUtf8Data(), b_byte_count); - // Modified UTF-8 single byte character range is 0x01 .. 0x7f + // Modified UTF-8 single byte character range is 0x01 .. 0x7f. // The string compression occurs on regular ASCII with same exact range, - // not on extended ASCII which up to 0xff - const bool is_b_regular_ascii = (b_byte_count == b_utf8_length); - if (is_b_regular_ascii) { - return memcmp(b.GetUtf8Data(), - a_string->GetValueCompressed(), a_length * sizeof(uint8_t)) == 0; - } else { - return false; - } + // not on extended ASCII which is up to 0xff. + return b.GetUtf8Data()[a_length] == 0 && + memcmp(b.GetUtf8Data(), a_string->GetValueCompressed(), a_length * sizeof(uint8_t)) == 0; + } else if (mirror::kUseStringCompression && b.GetUtf8Data()[a_length] == 0) { + // ASCII string `b` cannot equal non-ASCII `a_string`. + return false; } else { const uint16_t* a_value = a_string->GetValue(); return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0; diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc index 4da545396f..c36bc8a811 100644 --- a/runtime/intern_table.cc +++ b/runtime/intern_table.cc @@ -103,12 +103,9 @@ ObjPtr<mirror::String> InternTable::LookupStrong(Thread* self, ObjPtr<mirror::St ObjPtr<mirror::String> InternTable::LookupStrong(Thread* self, uint32_t utf16_length, const char* utf8_data) { - DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data)); - Utf8String string(utf16_length, - utf8_data, - ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length)); + int32_t hash = Utf8String::Hash(utf16_length, utf8_data); MutexLock mu(self, *Locks::intern_table_lock_); - return strong_interns_.Find(string); + return strong_interns_.Find(Utf8String(utf16_length, utf8_data, hash)); } ObjPtr<mirror::String> InternTable::LookupWeakLocked(ObjPtr<mirror::String> s) { @@ -254,16 +251,34 @@ ObjPtr<mirror::String> InternTable::Insert(ObjPtr<mirror::String> s, return is_strong ? InsertStrong(s) : InsertWeak(s); } -ObjPtr<mirror::String> InternTable::InternStrong(int32_t utf16_length, const char* utf8_data) { +ObjPtr<mirror::String> InternTable::InternStrong(uint32_t utf16_length, const char* utf8_data) { DCHECK(utf8_data != nullptr); + int32_t hash = Utf8String::Hash(utf16_length, utf8_data); Thread* self = Thread::Current(); - // Try to avoid allocation. - ObjPtr<mirror::String> s = LookupStrong(self, utf16_length, utf8_data); + ObjPtr<mirror::String> s; + { + // Try to avoid allocation. If we need to allocate, release the mutex before the allocation. + MutexLock mu(self, *Locks::intern_table_lock_); + s = strong_interns_.Find(Utf8String(utf16_length, utf8_data, hash)); + } if (s != nullptr) { return s; } - return InternStrong(mirror::String::AllocFromModifiedUtf8( - self, utf16_length, utf8_data)); + bool is_ascii = (utf8_data[utf16_length] == 0); + int32_t utf8_length = utf16_length + (LIKELY(is_ascii) ? 0 : strlen(utf8_data + utf16_length)); + DCHECK_EQ(static_cast<size_t>(utf8_length), strlen(utf8_data)); + s = mirror::String::AllocFromModifiedUtf8(self, utf16_length, utf8_data, utf8_length); + if (UNLIKELY(s == nullptr)) { + self->AssertPendingOOMException(); + return nullptr; + } + if (kIsDebugBuild) { + int32_t string_hash = s->GetHashCode(); // Implicitly sets the hash code. + CHECK_EQ(hash, string_hash); + } else { + s->SetHashCode(hash); + } + return InternStrong(s); } ObjPtr<mirror::String> InternTable::InternStrong(const char* utf8_data) { diff --git a/runtime/intern_table.h b/runtime/intern_table.h index c5fe797ff8..ba039cc22d 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -66,6 +66,8 @@ class InternTable { uint32_t GetUtf16Length() const { return utf16_length_; } const char* GetUtf8Data() const { return utf8_data_; } + static int32_t Hash(uint32_t utf16_length, const char* utf8_data); + private: int32_t hash_; uint32_t utf16_length_; @@ -112,7 +114,7 @@ class InternTable { InternTable(); // Interns a potentially new string in the 'strong' table. May cause thread suspension. - ObjPtr<mirror::String> InternStrong(int32_t utf16_length, const char* utf8_data) + ObjPtr<mirror::String> InternStrong(uint32_t utf16_length, const char* utf8_data) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); // Only used by image writer. Special version that may not cause thread suspension since the GC diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h index 6cb560e4a2..66e0151add 100644 --- a/runtime/mirror/string.h +++ b/runtime/mirror/string.h @@ -29,6 +29,7 @@ enum AllocatorType : char; } // namespace gc template<class T> class Handle; +class InternTable; template<class MirrorType> class ObjPtr; class StringBuilderAppend; struct StringOffsets; @@ -277,6 +278,7 @@ class MANAGED String final : public Object { uint8_t value_compressed_[0]; }; + friend class art::InternTable; // Let `InternTable` call `SetHashCode()`. friend class art::StringBuilderAppend; friend struct art::StringOffsets; // for verifying offset information |