Avoid Modified-UTF8 processing for ASCII strings.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 181943478
Change-Id: I9926a0d3e0160aa56ba7a02922388bb3007aaccb
diff --git a/dex2oat/linker/image_writer.cc b/dex2oat/linker/image_writer.cc
index 0a6566c..331b8b7 100644
--- a/dex2oat/linker/image_writer.cc
+++ b/dex2oat/linker/image_writer.cc
@@ -2019,9 +2019,8 @@
       uint32_t utf16_length;
       const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i),
                                                                       &utf16_length);
-      int32_t hash = ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length);
-      InternTable::Utf8String utf8_string(utf16_length, utf8_data, hash);
-      auto intern_it = intern_set.find(utf8_string);
+      int32_t hash = InternTable::Utf8String::Hash(utf16_length, utf8_data);
+      auto intern_it = intern_set.find(InternTable::Utf8String(utf16_length, utf8_data, hash));
       if (intern_it != intern_set.end()) {
         mirror::String* string = intern_it->Read<kWithoutReadBarrier>();
         DCHECK(string != nullptr);
diff --git a/libdexfile/dex/utf.h b/libdexfile/dex/utf.h
index 6949319..35cbf78 100644
--- a/libdexfile/dex/utf.h
+++ b/libdexfile/dex/utf.h
@@ -17,13 +17,14 @@
 #ifndef ART_LIBDEXFILE_DEX_UTF_H_
 #define ART_LIBDEXFILE_DEX_UTF_H_
 
-#include "base/macros.h"
-
 #include <stddef.h>
 #include <stdint.h>
 
 #include <string>
 #include <string_view>
+#include <type_traits>
+
+#include "base/macros.h"
 
 /*
  * All UTF-8 in art is actually modified UTF-8. Mostly, this distinction
@@ -97,9 +98,13 @@
  */
 template<typename MemoryType>
 int32_t ComputeUtf16Hash(const MemoryType* chars, size_t char_count) {
+  static_assert(std::is_same_v<MemoryType, char> ||
+                std::is_same_v<MemoryType, uint8_t> ||
+                std::is_same_v<MemoryType, uint16_t>);
+  using UnsignedMemoryType = std::make_unsigned_t<MemoryType>;
   uint32_t hash = 0;
   while (char_count--) {
-    hash = hash * 31 + *chars++;
+    hash = hash * 31 + static_cast<UnsignedMemoryType>(*chars++);
   }
   return static_cast<int32_t>(hash);
 }
diff --git a/runtime/intern_table-inl.h b/runtime/intern_table-inl.h
index 44bdb1f..a1319f1 100644
--- a/runtime/intern_table-inl.h
+++ b/runtime/intern_table-inl.h
@@ -28,6 +28,17 @@
 
 namespace art {
 
+inline int32_t InternTable::Utf8String::Hash(uint32_t utf16_length, const char* utf8_data) {
+  DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data));
+  if (LIKELY(utf8_data[utf16_length] == 0)) {
+    int32_t hash = ComputeUtf16Hash(utf8_data, utf16_length);
+    DCHECK_EQ(hash, ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length));
+    return hash;
+  } else {
+    return ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length);
+  }
+}
+
 inline std::size_t InternTable::StringHash::operator()(const GcRoot<mirror::String>& root) const {
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
@@ -55,19 +66,16 @@
   if (a_length != b.GetUtf16Length()) {
     return false;
   }
+  DCHECK_GE(strlen(b.GetUtf8Data()), a_length);
   if (a_string->IsCompressed()) {
-    size_t b_byte_count = strlen(b.GetUtf8Data());
-    size_t b_utf8_length = CountModifiedUtf8Chars(b.GetUtf8Data(), b_byte_count);
-    // Modified UTF-8 single byte character range is 0x01 .. 0x7f
+    // Modified UTF-8 single byte character range is 0x01 .. 0x7f.
     // The string compression occurs on regular ASCII with same exact range,
-    // not on extended ASCII which up to 0xff
-    const bool is_b_regular_ascii = (b_byte_count == b_utf8_length);
-    if (is_b_regular_ascii) {
-      return memcmp(b.GetUtf8Data(),
-                    a_string->GetValueCompressed(), a_length * sizeof(uint8_t)) == 0;
-    } else {
-      return false;
-    }
+    // not on extended ASCII which is up to 0xff.
+    return b.GetUtf8Data()[a_length] == 0 &&
+           memcmp(b.GetUtf8Data(), a_string->GetValueCompressed(), a_length * sizeof(uint8_t)) == 0;
+  } else if (mirror::kUseStringCompression && b.GetUtf8Data()[a_length] == 0) {
+    // ASCII string `b` cannot equal non-ASCII `a_string`.
+    return false;
   } else {
     const uint16_t* a_value = a_string->GetValue();
     return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0;
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 4da5453..c36bc8a 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -103,12 +103,9 @@
 ObjPtr<mirror::String> InternTable::LookupStrong(Thread* self,
                                                  uint32_t utf16_length,
                                                  const char* utf8_data) {
-  DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data));
-  Utf8String string(utf16_length,
-                    utf8_data,
-                    ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length));
+  int32_t hash = Utf8String::Hash(utf16_length, utf8_data);
   MutexLock mu(self, *Locks::intern_table_lock_);
-  return strong_interns_.Find(string);
+  return strong_interns_.Find(Utf8String(utf16_length, utf8_data, hash));
 }
 
 ObjPtr<mirror::String> InternTable::LookupWeakLocked(ObjPtr<mirror::String> s) {
@@ -254,16 +251,34 @@
   return is_strong ? InsertStrong(s) : InsertWeak(s);
 }
 
-ObjPtr<mirror::String> InternTable::InternStrong(int32_t utf16_length, const char* utf8_data) {
+ObjPtr<mirror::String> InternTable::InternStrong(uint32_t utf16_length, const char* utf8_data) {
   DCHECK(utf8_data != nullptr);
+  int32_t hash = Utf8String::Hash(utf16_length, utf8_data);
   Thread* self = Thread::Current();
-  // Try to avoid allocation.
-  ObjPtr<mirror::String> s = LookupStrong(self, utf16_length, utf8_data);
+  ObjPtr<mirror::String> s;
+  {
+    // Try to avoid allocation. If we need to allocate, release the mutex before the allocation.
+    MutexLock mu(self, *Locks::intern_table_lock_);
+    s = strong_interns_.Find(Utf8String(utf16_length, utf8_data, hash));
+  }
   if (s != nullptr) {
     return s;
   }
-  return InternStrong(mirror::String::AllocFromModifiedUtf8(
-      self, utf16_length, utf8_data));
+  bool is_ascii = (utf8_data[utf16_length] == 0);
+  int32_t utf8_length = utf16_length + (LIKELY(is_ascii) ? 0 : strlen(utf8_data + utf16_length));
+  DCHECK_EQ(static_cast<size_t>(utf8_length), strlen(utf8_data));
+  s = mirror::String::AllocFromModifiedUtf8(self, utf16_length, utf8_data, utf8_length);
+  if (UNLIKELY(s == nullptr)) {
+    self->AssertPendingOOMException();
+    return nullptr;
+  }
+  if (kIsDebugBuild) {
+    int32_t string_hash = s->GetHashCode();  // Implicitly sets the hash code.
+    CHECK_EQ(hash, string_hash);
+  } else {
+    s->SetHashCode(hash);
+  }
+  return InternStrong(s);
 }
 
 ObjPtr<mirror::String> InternTable::InternStrong(const char* utf8_data) {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index c5fe797..ba039cc 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -66,6 +66,8 @@
     uint32_t GetUtf16Length() const { return utf16_length_; }
     const char* GetUtf8Data() const { return utf8_data_; }
 
+    static int32_t Hash(uint32_t utf16_length, const char* utf8_data);
+
    private:
     int32_t hash_;
     uint32_t utf16_length_;
@@ -112,7 +114,7 @@
   InternTable();
 
   // Interns a potentially new string in the 'strong' table. May cause thread suspension.
-  ObjPtr<mirror::String> InternStrong(int32_t utf16_length, const char* utf8_data)
+  ObjPtr<mirror::String> InternStrong(uint32_t utf16_length, const char* utf8_data)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 6cb560e..66e0151 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -29,6 +29,7 @@
 }  // namespace gc
 
 template<class T> class Handle;
+class InternTable;
 template<class MirrorType> class ObjPtr;
 class StringBuilderAppend;
 struct StringOffsets;
@@ -277,6 +278,7 @@
     uint8_t value_compressed_[0];
   };
 
+  friend class art::InternTable;  // Let `InternTable` call `SetHashCode()`.
   friend class art::StringBuilderAppend;
   friend struct art::StringOffsets;  // for verifying offset information