diff options
author | 2016-07-29 14:46:37 -0700 | |
---|---|---|
committer | 2016-08-23 15:12:26 -0700 | |
commit | 3aaa37bba53d6df0265793de48b4b0b57327e57a (patch) | |
tree | ce795803f0f10003eb3d96d9348da620937675b3 | |
parent | 792c98bb773c8c2390f9cbf774f85be9d9a75332 (diff) |
creating workflow for mirror::String compression
All-ASCII String characters are stored in 8-bit blocks
instead of 16-bit. The compression has not taken place, but all
workflow are in the code already (changing kUseStringCompression in
string.h file to TRUE will enable the feature)
Notes: Feature works on interpreter only without optimizing
Test art: m ART_TEST_INTERPRETER=true ART_TEST_OPTIMIZING=false
test-art-host
Also tested with String tests from libcore/:
1. libcore.java.lang.StringTest
2. libcore.java.lang.StringBufferTest
3. libcore.java.lang.StringBuilderTest
4. libcore.java.lang.OldStringTest
5. libcore.java.lang.OldStringBufferTest
Memory improvement is 33% (from 6.03% to 4.03%, total String memory
from all apps per total memory of all apps) measured on Angler
with Hprof tools
Bug: 31040547
Change-Id: I9cc92c265ebf1305fc06b5fc33efd83797660cce
-rw-r--r-- | compiler/image_test.cc | 6 | ||||
-rw-r--r-- | runtime/arch/stub_test.cc | 1 | ||||
-rw-r--r-- | runtime/common_runtime_test.h | 6 | ||||
-rw-r--r-- | runtime/debugger.cc | 11 | ||||
-rw-r--r-- | runtime/hprof/hprof.cc | 38 | ||||
-rw-r--r-- | runtime/intern_table.cc | 19 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_common.h | 2 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_goto_table_impl.cc | 3 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_switch_impl.cc | 3 | ||||
-rw-r--r-- | runtime/interpreter/mterp/mterp.cc | 3 | ||||
-rw-r--r-- | runtime/interpreter/unstarted_runtime_test.cc | 19 | ||||
-rw-r--r-- | runtime/jdwp/jdwp_bits.h | 11 | ||||
-rw-r--r-- | runtime/jni_internal.cc | 73 | ||||
-rw-r--r-- | runtime/jni_internal_test.cc | 28 | ||||
-rw-r--r-- | runtime/mirror/object_test.cc | 2 | ||||
-rw-r--r-- | runtime/mirror/string-inl.h | 147 | ||||
-rw-r--r-- | runtime/mirror/string.cc | 155 | ||||
-rw-r--r-- | runtime/mirror/string.h | 66 | ||||
-rw-r--r-- | runtime/native/java_lang_Class.cc | 20 | ||||
-rw-r--r-- | runtime/native/libcore_util_CharsetUtils.cc | 5 | ||||
-rw-r--r-- | runtime/utf.cc | 8 | ||||
-rw-r--r-- | runtime/utf.h | 11 | ||||
-rw-r--r-- | test/020-string/expected.txt | 2 | ||||
-rw-r--r-- | test/020-string/src/Main.java | 9 |
24 files changed, 519 insertions, 129 deletions
diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 91579e9daf..e1ee0d2966 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -188,6 +188,7 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { } uint64_t image_file_size; + size_t image_size; { std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); ASSERT_TRUE(file.get() != nullptr); @@ -206,6 +207,7 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ASSERT_TRUE(space->IsMallocSpace()); image_file_size = file->GetLength(); + image_size = image_header.GetImageSize(); } ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr); @@ -255,10 +257,10 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ASSERT_TRUE(image_space != nullptr); if (storage_mode == ImageHeader::kStorageModeUncompressed) { // Uncompressed, image should be smaller than file. - ASSERT_LE(image_space->Size(), image_file_size); + ASSERT_LE(image_size, image_file_size); } else { // Compressed, file should be smaller than image. - ASSERT_LE(image_file_size, image_space->Size()); + ASSERT_LE(image_file_size, image_size); } image_space->VerifyImageAllocations(); diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 80bb51d9b6..10adb3ac05 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -1203,6 +1203,7 @@ TEST_F(StubTest, AllocObjectArray) { TEST_F(StubTest, StringCompareTo) { + TEST_DISABLED_FOR_STRING_COMPRESSION(); // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__. #if defined(__i386__) || defined(__mips__) || \ (defined(__x86_64__) && !defined(__APPLE__)) diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index f445e52d20..2d16a493c8 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -207,6 +207,12 @@ class CheckJniAbortCatcher { return; \ } +#define TEST_DISABLED_FOR_STRING_COMPRESSION() \ + if (mirror::kUseStringCompression) { \ + printf("WARNING: TEST DISABLED FOR STRING COMPRESSION\n"); \ + return; \ + } + } // namespace art namespace std { diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 2a5198bf01..cbdf3dc636 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -1286,8 +1286,7 @@ JDWP::JdwpError Dbg::CreateObject(JDWP::RefTypeId class_id, JDWP::ObjectId* new_ if (c->IsStringClass()) { // Special case for java.lang.String. gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - new_object = mirror::String::Alloc<true>(self, 0, allocator_type, visitor); + new_object = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { new_object = c->AllocObject(self); } @@ -4327,10 +4326,16 @@ void Dbg::DdmSendThreadNotification(Thread* t, uint32_t type) { Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa))); size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0; const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr; + bool is_compressed = (name.Get() != nullptr) ? name->IsCompressed() : false; std::vector<uint8_t> bytes; JDWP::Append4BE(bytes, t->GetThreadId()); - JDWP::AppendUtf16BE(bytes, chars, char_count); + if (is_compressed) { + const uint8_t* chars_compressed = name->GetValueCompressed(); + JDWP::AppendUtf16CompressedBE(bytes, chars_compressed, char_count); + } else { + JDWP::AppendUtf16BE(bytes, chars, char_count); + } CHECK_EQ(bytes.size(), char_count*2 + sizeof(uint32_t)*2); Dbg::DdmSendChunk(type, bytes); } diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index 9895395169..4005f054d5 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -223,6 +223,12 @@ class EndianOutput { HandleU1List(values, count); length_ += count; } + void AddU1AsU2List(const uint8_t* values, size_t count) { + HandleU1AsU2List(values, count); + // Array of char from compressed String (8-bit) is added as 16-bit blocks + int ceil_count_to_even = count + ((count & 1) ? 1 : 0); + length_ += ceil_count_to_even * sizeof(uint8_t); + } void AddU2List(const uint16_t* values, size_t count) { HandleU2List(values, count); length_ += count * sizeof(uint16_t); @@ -268,6 +274,9 @@ class EndianOutput { virtual void HandleU1List(const uint8_t* values ATTRIBUTE_UNUSED, size_t count ATTRIBUTE_UNUSED) { } + virtual void HandleU1AsU2List(const uint8_t* values ATTRIBUTE_UNUSED, + size_t count ATTRIBUTE_UNUSED) { + } virtual void HandleU2List(const uint16_t* values ATTRIBUTE_UNUSED, size_t count ATTRIBUTE_UNUSED) { } @@ -308,6 +317,19 @@ class EndianOutputBuffered : public EndianOutput { buffer_.insert(buffer_.end(), values, values + count); } + void HandleU1AsU2List(const uint8_t* values, size_t count) OVERRIDE { + DCHECK_EQ(length_, buffer_.size()); + // All 8-bits are grouped in 2 to make 16-bit block like Java Char + if (count & 1) { + buffer_.push_back(0); + } + for (size_t i = 0; i < count; ++i) { + uint8_t value = *values; + buffer_.push_back(value); + values++; + } + } + void HandleU2List(const uint16_t* values, size_t count) OVERRIDE { DCHECK_EQ(length_, buffer_.size()); for (size_t i = 0; i < count; ++i) { @@ -1354,7 +1376,11 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { string_value = reinterpret_cast<mirror::Object*>( reinterpret_cast<uintptr_t>(s) + kObjectAlignment); } else { - string_value = reinterpret_cast<mirror::Object*>(s->GetValue()); + if (s->IsCompressed()) { + string_value = reinterpret_cast<mirror::Object*>(s->GetValueCompressed()); + } else { + string_value = reinterpret_cast<mirror::Object*>(s->GetValue()); + } } __ AddObjectId(string_value); } @@ -1369,12 +1395,18 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { CHECK_EQ(obj->IsString(), string_value != nullptr); if (string_value != nullptr) { mirror::String* s = obj->AsString(); + // Compressed string's (8-bit) length is ceil(length/2) in 16-bit blocks + int length_in_16_bit = (s->IsCompressed()) ? ((s->GetLength() + 1) / 2) : s->GetLength(); __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(string_value); __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); - __ AddU4(s->GetLength()); + __ AddU4(length_in_16_bit); __ AddU1(hprof_basic_char); - __ AddU2List(s->GetValue(), s->GetLength()); + if (s->IsCompressed()) { + __ AddU1AsU2List(s->GetValueCompressed(), s->GetLength()); + } else { + __ AddU2List(s->GetValue(), s->GetLength()); + } } } diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc index eceb593e08..1940d67316 100644 --- a/runtime/intern_table.cc +++ b/runtime/intern_table.cc @@ -386,8 +386,23 @@ bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a, if (a_length != b.GetUtf16Length()) { return false; } - const uint16_t* a_value = a_string->GetValue(); - return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0; + if (a_string->IsCompressed()) { + size_t b_byte_count = strlen(b.GetUtf8Data()); + size_t b_utf8_length = CountModifiedUtf8Chars(b.GetUtf8Data(), b_byte_count); + // Modified UTF-8 single byte character range is 0x01 .. 0x7f + // The string compression occurs on regular ASCII with same exact range, + // not on extended ASCII which up to 0xff + const bool is_b_regular_ascii = (b_byte_count == b_utf8_length); + if (is_b_regular_ascii) { + return memcmp(b.GetUtf8Data(), + a_string->GetValueCompressed(), a_length * sizeof(uint8_t)) == 0; + } else { + return false; + } + } else { + const uint16_t* a_value = a_string->GetValue(); + return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0; + } } size_t InternTable::Table::AddTableFromMemory(const uint8_t* ptr) { diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index 90c8227443..acdc270b28 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -450,7 +450,7 @@ static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruc oss << StringPrintf(" vreg%u=0x%08X", i, raw_value); if (ref_value != nullptr) { if (ref_value->GetClass()->IsStringClass() && - ref_value->AsString()->GetValue() != nullptr) { + !ref_value->AsString()->IsValueNull()) { oss << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\""; } else { oss << "/" << PrettyTypeOf(ref_value); diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 43b27781e4..6aba898412 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -530,8 +530,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF if (LIKELY(c != nullptr)) { if (UNLIKELY(c->IsStringClass())) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - obj = String::Alloc<true>(self, 0, allocator_type, visitor); + obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { obj = AllocObjectFromCode<do_access_check, true>( inst->VRegB_21c(), shadow_frame.GetMethod(), self, diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index a6349fcf88..582fc9b438 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -477,8 +477,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (LIKELY(c != nullptr)) { if (UNLIKELY(c->IsStringClass())) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - obj = String::Alloc<true>(self, 0, allocator_type, visitor); + obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { obj = AllocObjectFromCode<do_access_check, true>( inst->VRegB_21c(), shadow_frame.GetMethod(), self, diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index c25cd78309..20a0753dd9 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -358,8 +358,7 @@ extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint if (LIKELY(c != nullptr)) { if (UNLIKELY(c->IsStringClass())) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - obj = String::Alloc<true>(self, 0, allocator_type, visitor); + obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { obj = AllocObjectFromCode<false, true>( inst->VRegB_21c(), shadow_frame->GetMethod(), self, diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc index 7e1f7950eb..c3246008a1 100644 --- a/runtime/interpreter/unstarted_runtime_test.cc +++ b/runtime/interpreter/unstarted_runtime_test.cc @@ -401,8 +401,23 @@ TEST_F(UnstartedRuntimeTest, StringInit) { interpreter::DoCall<false, false>(method, self, *shadow_frame, inst, inst_data[0], &result); mirror::String* string_result = reinterpret_cast<mirror::String*>(result.GetL()); EXPECT_EQ(string_arg->GetLength(), string_result->GetLength()); - EXPECT_EQ(memcmp(string_arg->GetValue(), string_result->GetValue(), - string_arg->GetLength() * sizeof(uint16_t)), 0); + + if (string_arg->IsCompressed() && string_result->IsCompressed()) { + EXPECT_EQ(memcmp(string_arg->GetValueCompressed(), string_result->GetValueCompressed(), + string_arg->GetLength() * sizeof(uint8_t)), 0); + } else if (!string_arg->IsCompressed() && !string_result->IsCompressed()) { + EXPECT_EQ(memcmp(string_arg->GetValue(), string_result->GetValue(), + string_arg->GetLength() * sizeof(uint16_t)), 0); + } else { + bool equal = true; + for (int i = 0; i < string_arg->GetLength(); ++i) { + if (string_arg->CharAt(i) != string_result->CharAt(i)) { + equal = false; + break; + } + } + EXPECT_EQ(equal, true); + } ShadowFrame::DeleteDeoptimizedFrame(shadow_frame); } diff --git a/runtime/jdwp/jdwp_bits.h b/runtime/jdwp/jdwp_bits.h index f9cf9ca0d9..33b98f3efe 100644 --- a/runtime/jdwp/jdwp_bits.h +++ b/runtime/jdwp/jdwp_bits.h @@ -59,13 +59,22 @@ static inline void Append8BE(std::vector<uint8_t>& bytes, uint64_t value) { bytes.push_back(static_cast<uint8_t>(value)); } -static inline void AppendUtf16BE(std::vector<uint8_t>& bytes, const uint16_t* chars, size_t char_count) { +static inline void AppendUtf16BE(std::vector<uint8_t>& bytes, const uint16_t* chars, + size_t char_count) { Append4BE(bytes, char_count); for (size_t i = 0; i < char_count; ++i) { Append2BE(bytes, chars[i]); } } +static inline void AppendUtf16CompressedBE(std::vector<uint8_t>& bytes, + const uint8_t* chars, size_t char_count) { + Append4BE(bytes, char_count); + for (size_t i = 0; i < char_count; ++i) { + Append2BE(bytes, static_cast<uint16_t>(chars[i])); + } +} + // @deprecated static inline void Set1(uint8_t* buf, uint8_t val) { *buf = val; diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc index c3224757d8..7bcadd8b78 100644 --- a/runtime/jni_internal.cc +++ b/runtime/jni_internal.cc @@ -592,9 +592,8 @@ class JNI { } if (c->IsStringClass()) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - return soa.AddLocalReference<jobject>(mirror::String::Alloc<true>(soa.Self(), 0, - allocator_type, visitor)); + return soa.AddLocalReference<jobject>(mirror::String::AllocEmptyString<true>(soa.Self(), + allocator_type)); } return soa.AddLocalReference<jobject>(c->AllocObject(soa.Self())); } @@ -1673,8 +1672,14 @@ class JNI { ThrowSIOOBE(soa, start, length, s->GetLength()); } else { CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf); - const jchar* chars = s->GetValue(); - memcpy(buf, chars + start, length * sizeof(jchar)); + if (s->IsCompressed()) { + for (int i = 0; i < length; ++i) { + buf[i] = static_cast<jchar>(s->CharAt(start+i)); + } + } else { + const jchar* chars = static_cast<jchar*>(s->GetValue()); + memcpy(buf, chars + start, length * sizeof(jchar)); + } } } @@ -1687,9 +1692,15 @@ class JNI { ThrowSIOOBE(soa, start, length, s->GetLength()); } else { CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf); - const jchar* chars = s->GetValue(); - size_t bytes = CountUtf8Bytes(chars + start, length); - ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length); + if (s->IsCompressed()) { + for (int i = 0; i < length; ++i) { + buf[i] = s->CharAt(start+i); + } + } else { + const jchar* chars = s->GetValue(); + size_t bytes = CountUtf8Bytes(chars + start, length); + ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length); + } } } @@ -1698,9 +1709,16 @@ class JNI { ScopedObjectAccess soa(env); mirror::String* s = soa.Decode<mirror::String*>(java_string); gc::Heap* heap = Runtime::Current()->GetHeap(); - if (heap->IsMovableObject(s)) { + if (heap->IsMovableObject(s) || s->IsCompressed()) { jchar* chars = new jchar[s->GetLength()]; - memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength()); + if (s->IsCompressed()) { + int32_t length = s->GetLength(); + for (int i = 0; i < length; ++i) { + chars[i] = s->CharAt(i); + } + } else { + memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength()); + } if (is_copy != nullptr) { *is_copy = JNI_TRUE; } @@ -1716,7 +1734,7 @@ class JNI { CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string); ScopedObjectAccess soa(env); mirror::String* s = soa.Decode<mirror::String*>(java_string); - if (chars != s->GetValue()) { + if (s->IsCompressed() || (s->IsCompressed() == false && chars != s->GetValue())) { delete[] chars; } } @@ -1737,15 +1755,27 @@ class JNI { heap->IncrementDisableThreadFlip(soa.Self()); } } - if (is_copy != nullptr) { - *is_copy = JNI_FALSE; + if (s->IsCompressed()) { + if (is_copy != nullptr) { + *is_copy = JNI_TRUE; + } + int32_t length = s->GetLength(); + jchar* chars = new jchar[length]; + for (int i = 0; i < length; ++i) { + chars[i] = s->CharAt(i); + } + return chars; + } else { + if (is_copy != nullptr) { + *is_copy = JNI_FALSE; + } + return static_cast<jchar*>(s->GetValue()); } - return static_cast<jchar*>(s->GetValue()); } static void ReleaseStringCritical(JNIEnv* env, jstring java_string, - const jchar* chars ATTRIBUTE_UNUSED) { + const jchar* chars) { CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string); ScopedObjectAccess soa(env); gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -1757,6 +1787,9 @@ class JNI { heap->DecrementDisableThreadFlip(soa.Self()); } } + if (s->IsCompressed() || (s->IsCompressed() == false && s->GetValue() != chars)) { + delete[] chars; + } } static const char* GetStringUTFChars(JNIEnv* env, jstring java_string, jboolean* is_copy) { @@ -1771,8 +1804,14 @@ class JNI { size_t byte_count = s->GetUtfLength(); char* bytes = new char[byte_count + 1]; CHECK(bytes != nullptr); // bionic aborts anyway. - const uint16_t* chars = s->GetValue(); - ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength()); + if (s->IsCompressed()) { + for (size_t i = 0; i < byte_count; ++i) { + bytes[i] = s->CharAt(i); + } + } else { + const uint16_t* chars = s->GetValue(); + ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength()); + } bytes[byte_count] = '\0'; return bytes; } diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc index 04ba8dfc64..64954743d4 100644 --- a/runtime/jni_internal_test.cc +++ b/runtime/jni_internal_test.cc @@ -880,8 +880,15 @@ TEST_F(JniInternalTest, FromReflectedField_ToReflectedField) { ASSERT_NE(fid2, nullptr); // Make sure we can actually use it. jstring s = env_->NewStringUTF("poop"); - ASSERT_EQ(4, env_->GetIntField(s, fid2)); - + if (mirror::kUseStringCompression) { + // Negative because s is compressed (first bit is 1) + ASSERT_EQ(-2147483644, env_->GetIntField(s, fid2)); + // Create incompressible string + jstring s_16 = env_->NewStringUTF("\u0444\u0444"); + ASSERT_EQ(2, env_->GetIntField(s_16, fid2)); + } else { + ASSERT_EQ(4, env_->GetIntField(s, fid2)); + } // Bad arguments. GetFromReflectedField_ToReflectedFieldBadArgumentTest(false); GetFromReflectedField_ToReflectedFieldBadArgumentTest(true); @@ -1632,13 +1639,28 @@ TEST_F(JniInternalTest, GetStringCritical_ReleaseStringCritical) { jboolean is_copy = JNI_TRUE; chars = env_->GetStringCritical(s, &is_copy); - EXPECT_EQ(JNI_FALSE, is_copy); + if (mirror::kUseStringCompression) { + // is_copy has to be JNI_TRUE because "hello" is all-ASCII + EXPECT_EQ(JNI_TRUE, is_copy); + } else { + EXPECT_EQ(JNI_FALSE, is_copy); + } EXPECT_EQ(expected[0], chars[0]); EXPECT_EQ(expected[1], chars[1]); EXPECT_EQ(expected[2], chars[2]); EXPECT_EQ(expected[3], chars[3]); EXPECT_EQ(expected[4], chars[4]); env_->ReleaseStringCritical(s, chars); + + if (mirror::kUseStringCompression) { + // is_copy has to be JNI_FALSE because "\xed\xa0\x81\xed\xb0\x80" is incompressible + jboolean is_copy_16 = JNI_TRUE; + jstring s_16 = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80"); + chars = env_->GetStringCritical(s_16, &is_copy_16); + EXPECT_EQ(2, env_->GetStringLength(s_16)); + EXPECT_EQ(4, env_->GetStringUTFLength(s_16)); + env_->ReleaseStringCritical(s_16, chars); + } } TEST_F(JniInternalTest, GetObjectArrayElement_SetObjectArrayElement) { diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc index 00342202cb..b35a479914 100644 --- a/runtime/mirror/object_test.cc +++ b/runtime/mirror/object_test.cc @@ -62,7 +62,7 @@ class ObjectTest : public CommonRuntimeTest { Handle<String> string( hs.NewHandle(String::AllocFromModifiedUtf8(self, expected_utf16_length, utf8_in))); ASSERT_EQ(expected_utf16_length, string->GetLength()); - ASSERT_TRUE(string->GetValue() != nullptr); + ASSERT_EQ(string->IsValueNull(), false); // strlen is necessary because the 1-character string "\x00\x00" is interpreted as "" ASSERT_TRUE(string->Equals(utf8_in) || (expected_utf16_length == 1 && strlen(utf8_in) == 0)); ASSERT_TRUE(string->Equals(StringPiece(utf8_in)) || diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h index d3660e5615..bc39ea86f7 100644 --- a/runtime/mirror/string-inl.h +++ b/runtime/mirror/string-inl.h @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef ART_RUNTIME_MIRROR_STRING_INL_H_ #define ART_RUNTIME_MIRROR_STRING_INL_H_ @@ -49,6 +48,7 @@ class SetStringCountVisitor { // Avoid AsString as object is not yet in live bitmap or allocation stack. String* string = down_cast<String*>(obj); string->SetCount(count_); + DCHECK(!string->IsCompressed() || kUseStringCompression); } private: @@ -68,10 +68,19 @@ class SetStringCountAndBytesVisitor { // Avoid AsString as object is not yet in live bitmap or allocation stack. String* string = down_cast<String*>(obj); string->SetCount(count_); - uint16_t* value = string->GetValue(); + DCHECK(!string->IsCompressed() || kUseStringCompression); + int32_t length = String::GetLengthFromCount(count_); const uint8_t* const src = reinterpret_cast<uint8_t*>(src_array_->GetData()) + offset_; - for (int i = 0; i < count_; i++) { - value[i] = high_byte_ + (src[i] & 0xFF); + if (string->IsCompressed()) { + uint8_t* valueCompressed = string->GetValueCompressed(); + for (int i = 0; i < length; i++) { + valueCompressed[i] = (src[i] & 0xFF); + } + } else { + uint16_t* value = string->GetValue(); + for (int i = 0; i < length; i++) { + value[i] = high_byte_ + (src[i] & 0xFF); + } } } @@ -96,7 +105,16 @@ class SetStringCountAndValueVisitorFromCharArray { String* string = down_cast<String*>(obj); string->SetCount(count_); const uint16_t* const src = src_array_->GetData() + offset_; - memcpy(string->GetValue(), src, count_ * sizeof(uint16_t)); + const int32_t length = String::GetLengthFromCount(count_); + bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_); + DCHECK(!compressible || kUseStringCompression); + if (compressible) { + for (int i = 0; i < length; ++i) { + string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]); + } + } else { + memcpy(string->GetValue(), src, length * sizeof(uint16_t)); + } } private: @@ -118,8 +136,22 @@ class SetStringCountAndValueVisitorFromString { // Avoid AsString as object is not yet in live bitmap or allocation stack. String* string = down_cast<String*>(obj); string->SetCount(count_); - const uint16_t* const src = src_string_->GetValue() + offset_; - memcpy(string->GetValue(), src, count_ * sizeof(uint16_t)); + const int32_t length = String::GetLengthFromCount(count_); + bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_); + DCHECK(!compressible || kUseStringCompression); + if (src_string_->IsCompressed()) { + const uint8_t* const src = src_string_->GetValueCompressed() + offset_; + memcpy(string->GetValueCompressed(), src, length * sizeof(uint8_t)); + } else { + const uint16_t* const src = src_string_->GetValue() + offset_; + if (compressible) { + for (int i = 0; i < length; ++i) { + string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]); + } + } else { + memcpy(string->GetValue(), src, length * sizeof(uint16_t)); + } + } } private: @@ -133,17 +165,38 @@ inline String* String::Intern() { } inline uint16_t String::CharAt(int32_t index) { - int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_)); + int32_t count = GetLength(); if (UNLIKELY((index < 0) || (index >= count))) { ThrowStringIndexOutOfBoundsException(index, count); return 0; } - return GetValue()[index]; + if (IsCompressed()) { + return GetValueCompressed()[index]; + } else { + return GetValue()[index]; + } +} + +template <typename MemoryType> +int32_t String::FastIndexOf(MemoryType* chars, int32_t ch, int32_t start) { + const MemoryType* p = chars + start; + const MemoryType* end = chars + GetLength(); + while (p < end) { + if (*p++ == ch) { + return (p - 1) - chars; + } + } + return -1; } template<VerifyObjectFlags kVerifyFlags> inline size_t String::SizeOf() { - size_t size = sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>()); + size_t size = sizeof(String); + if (IsCompressed()) { + size += (sizeof(uint8_t) * GetLength<kVerifyFlags>()); + } else { + size += (sizeof(uint16_t) * GetLength<kVerifyFlags>()); + } // String.equals() intrinsics assume zero-padding up to kObjectAlignment, // so make sure the zero-padding is actually copied around if GC compaction // chooses to copy only SizeOf() bytes. @@ -152,31 +205,35 @@ inline size_t String::SizeOf() { } template <bool kIsInstrumented, typename PreFenceVisitor> -inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type, +inline String* String::Alloc(Thread* self, int32_t utf16_length_with_flag, + gc::AllocatorType allocator_type, const PreFenceVisitor& pre_fence_visitor) { constexpr size_t header_size = sizeof(String); - static_assert(sizeof(utf16_length) <= sizeof(size_t), + const bool compressible = kUseStringCompression && + String::GetCompressionFlagFromCount(utf16_length_with_flag); + const size_t block_size = (compressible) ? sizeof(uint8_t) : sizeof(uint16_t); + size_t length = String::GetLengthFromCount(utf16_length_with_flag); + static_assert(sizeof(length) <= sizeof(size_t), "static_cast<size_t>(utf16_length) must not lose bits."); - size_t length = static_cast<size_t>(utf16_length); - size_t data_size = sizeof(uint16_t) * length; + size_t data_size = block_size * length; size_t size = header_size + data_size; // String.equals() intrinsics assume zero-padding up to kObjectAlignment, // so make sure the allocator clears the padding as well. // http://b/23528461 size_t alloc_size = RoundUp(size, kObjectAlignment); - Class* string_class = GetJavaLangString(); + Class* string_class = GetJavaLangString(); // Check for overflow and throw OutOfMemoryError if this was an unreasonable request. // Do this by comparing with the maximum length that will _not_ cause an overflow. - constexpr size_t overflow_length = (-header_size) / sizeof(uint16_t); // Unsigned arithmetic. - constexpr size_t max_alloc_length = overflow_length - 1u; + const size_t overflow_length = (-header_size) / block_size; // Unsigned arithmetic. + const size_t max_alloc_length = overflow_length - 1u; static_assert(IsAligned<sizeof(uint16_t)>(kObjectAlignment), "kObjectAlignment must be at least as big as Java char alignment"); - constexpr size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / sizeof(uint16_t)); + const size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / block_size); if (UNLIKELY(length > max_length)) { self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow", PrettyDescriptor(string_class).c_str(), - utf16_length).c_str()); + static_cast<int>(length)).c_str()); return nullptr; } @@ -187,11 +244,22 @@ inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorTy } template <bool kIsInstrumented> +inline String* String::AllocEmptyString(Thread* self, gc::AllocatorType allocator_type) { + SetStringCountVisitor visitor(0); + return Alloc<kIsInstrumented>(self, 0, allocator_type, visitor); +} + +template <bool kIsInstrumented> inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length, Handle<ByteArray> array, int32_t offset, int32_t high_byte, gc::AllocatorType allocator_type) { - SetStringCountAndBytesVisitor visitor(byte_length, array, offset, high_byte << 8); - String* string = Alloc<kIsInstrumented>(self, byte_length, allocator_type, visitor); + const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset; + const bool compressible = kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length) + && (high_byte == 0); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(byte_length) + : byte_length; + SetStringCountAndBytesVisitor visitor(length_with_flag, array, offset, high_byte << 8); + String* string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor); return string; } @@ -201,16 +269,24 @@ inline String* String::AllocFromCharArray(Thread* self, int32_t count, gc::AllocatorType allocator_type) { // It is a caller error to have a count less than the actual array's size. DCHECK_GE(array->GetLength(), count); - SetStringCountAndValueVisitorFromCharArray visitor(count, array, offset); - String* new_string = Alloc<kIsInstrumented>(self, count, allocator_type, visitor); + const bool compressible = kUseStringCompression && + String::AllASCII<uint16_t>(array->GetData() + offset, count); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(count) : count; + SetStringCountAndValueVisitorFromCharArray visitor(length_with_flag, array, offset); + String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor); return new_string; } template <bool kIsInstrumented> inline String* String::AllocFromString(Thread* self, int32_t string_length, Handle<String> string, int32_t offset, gc::AllocatorType allocator_type) { - SetStringCountAndValueVisitorFromString visitor(string_length, string, offset); - String* new_string = Alloc<kIsInstrumented>(self, string_length, allocator_type, visitor); + const bool compressible = kUseStringCompression && + ((string->IsCompressed()) ? true : String::AllASCII<uint16_t>(string->GetValue() + offset, + string_length)); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(string_length) + : string_length; + SetStringCountAndValueVisitorFromString visitor(length_with_flag, string, offset); + String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor); return new_string; } @@ -219,11 +295,28 @@ inline int32_t String::GetHashCode() { if (UNLIKELY(result == 0)) { result = ComputeHashCode(); } - DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0) - << ToModifiedUtf8() << " " << result; + if (kIsDebugBuild) { + if (IsCompressed()) { + DCHECK(result != 0 || ComputeUtf16Hash(GetValueCompressed(), GetLength()) == 0) + << ToModifiedUtf8() << " " << result; + } else { + DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0) + << ToModifiedUtf8() << " " << result; + } + } return result; } +template<typename MemoryType> +bool String::AllASCII(const MemoryType* const chars, const int length) { + for (int i = 0; i < length; ++i) { + if (chars[i] > 0x80) { + return false; + } + } + return true; +} + } // namespace mirror } // namespace art diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc index 33aca0304c..46caa4d73f 100644 --- a/runtime/mirror/string.cc +++ b/runtime/mirror/string.cc @@ -41,15 +41,11 @@ int32_t String::FastIndexOf(int32_t ch, int32_t start) { } else if (start > count) { start = count; } - const uint16_t* chars = GetValue(); - const uint16_t* p = chars + start; - const uint16_t* end = chars + count; - while (p < end) { - if (*p++ == ch) { - return (p - 1) - chars; - } + if (IsCompressed()) { + return FastIndexOf<uint8_t>(GetValueCompressed(), ch, start); + } else { + return FastIndexOf<uint16_t>(GetValue(), ch, start); } - return -1; } void String::SetClass(Class* java_lang_String) { @@ -65,45 +61,91 @@ void String::ResetClass() { } int String::ComputeHashCode() { - const int32_t hash_code = ComputeUtf16Hash(GetValue(), GetLength()); + int32_t hash_code = 0; + if (IsCompressed()) { + hash_code = ComputeUtf16Hash(GetValueCompressed(), GetLength()); + } else { + hash_code = ComputeUtf16Hash(GetValue(), GetLength()); + } SetHashCode(hash_code); return hash_code; } int32_t String::GetUtfLength() { - return CountUtf8Bytes(GetValue(), GetLength()); + if (IsCompressed()) { + return GetLength(); + } else { + return CountUtf8Bytes(GetValue(), GetLength()); + } } void String::SetCharAt(int32_t index, uint16_t c) { - DCHECK((index >= 0) && (index < count_)); - GetValue()[index] = c; + DCHECK((index >= 0) && (index < GetLength())); + if (IsCompressed()) { + // TODO: Handle the case where String is compressed and c is non-ASCII + GetValueCompressed()[index] = static_cast<uint8_t>(c); + } else { + GetValue()[index] = c; + } } String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) { int32_t length = string->GetLength(); int32_t length2 = string2->GetLength(); gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - SetStringCountVisitor visitor(length + length2); - String* new_string = Alloc<true>(self, length + length2, allocator_type, visitor); + const bool compressible = kUseStringCompression && (string->IsCompressed() && string2->IsCompressed()); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(length + length2) + : (length + length2); + + SetStringCountVisitor visitor(length_with_flag); + String* new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor); if (UNLIKELY(new_string == nullptr)) { return nullptr; } - uint16_t* new_value = new_string->GetValue(); - memcpy(new_value, string->GetValue(), length * sizeof(uint16_t)); - memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t)); + if (compressible) { + uint8_t* new_value = new_string->GetValueCompressed(); + memcpy(new_value, string->GetValueCompressed(), length * sizeof(uint8_t)); + memcpy(new_value + length, string2->GetValueCompressed(), length2 * sizeof(uint8_t)); + } else { + uint16_t* new_value = new_string->GetValue(); + if (string->IsCompressed()) { + for (int i = 0; i < length; ++i) { + new_value[i] = string->CharAt(i); + } + } else { + memcpy(new_value, string->GetValue(), length * sizeof(uint16_t)); + } + if (string2->IsCompressed()) { + for (int i = 0; i < length2; ++i) { + new_value[i+length] = string2->CharAt(i); + } + } else { + memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t)); + } + } return new_string; } String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) { CHECK(utf16_data_in != nullptr || utf16_length == 0); gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - SetStringCountVisitor visitor(utf16_length); - String* string = Alloc<true>(self, utf16_length, allocator_type, visitor); + const bool compressible = kUseStringCompression && + String::AllASCII<uint16_t>(utf16_data_in, utf16_length); + int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length) + : utf16_length; + SetStringCountVisitor visitor(length_with_flag); + String* string = Alloc<true>(self, length_with_flag, allocator_type, visitor); if (UNLIKELY(string == nullptr)) { return nullptr; } - uint16_t* array = string->GetValue(); - memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t)); + if (compressible) { + for (int i = 0; i < utf16_length; ++i) { + string->GetValueCompressed()[i] = static_cast<uint8_t>(utf16_data_in[i]); + } + } else { + uint16_t* array = string->GetValue(); + memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t)); + } return string; } @@ -121,13 +163,20 @@ String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in, int32_t utf8_length) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - SetStringCountVisitor visitor(utf16_length); - String* string = Alloc<true>(self, utf16_length, allocator_type, visitor); + const bool compressible = kUseStringCompression && (utf16_length == utf8_length); + const int32_t utf16_length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length) + : utf16_length; + SetStringCountVisitor visitor(utf16_length_with_flag); + String* string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor); if (UNLIKELY(string == nullptr)) { return nullptr; } - uint16_t* utf16_data_out = string->GetValue(); - ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length); + if (compressible) { + memcpy(string->GetValueCompressed(), utf8_data_in, utf16_length * sizeof(uint8_t)); + } else { + uint16_t* utf16_data_out = string->GetValue(); + ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length); + } return string; } @@ -219,10 +268,16 @@ bool String::Equals(const StringPiece& modified_utf8) { // Create a modified UTF-8 encoded std::string from a java/lang/String object. std::string String::ToModifiedUtf8() { - const uint16_t* chars = GetValue(); size_t byte_count = GetUtfLength(); std::string result(byte_count, static_cast<char>(0)); - ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength()); + if (IsCompressed()) { + for (size_t i = 0; i < byte_count; ++i) { + result[i] = static_cast<char>(CharAt(i)); + } + } else { + const uint16_t* chars = GetValue(); + ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength()); + } return result; } @@ -242,11 +297,24 @@ int32_t String::CompareTo(String* rhs) { int32_t rhsCount = rhs->GetLength(); int32_t countDiff = lhsCount - rhsCount; int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount; - const uint16_t* lhsChars = lhs->GetValue(); - const uint16_t* rhsChars = rhs->GetValue(); - int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount); - if (otherRes != 0) { - return otherRes; + if (lhs->IsCompressed() && rhs->IsCompressed()) { + int32_t comparison = memcmp(lhs->GetValueCompressed(), rhs->GetValueCompressed(), minCount * sizeof(uint8_t)); + if (comparison != 0) { + return comparison; + } + } else if (lhs->IsCompressed() || rhs->IsCompressed()) { + for (int32_t i = 0; i < minCount; ++i) { + if (lhs->CharAt(i) != rhs->CharAt(i)) { + return static_cast<int32_t>(lhs->CharAt(i)) - static_cast<int32_t>(rhs->CharAt(i)); + } + } + } else { + const uint16_t* lhsChars = lhs->GetValue(); + const uint16_t* rhsChars = rhs->GetValue(); + int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount); + if (otherRes != 0) { + return otherRes; + } } return countDiff; } @@ -260,7 +328,14 @@ CharArray* String::ToCharArray(Thread* self) { Handle<String> string(hs.NewHandle(this)); CharArray* result = CharArray::Alloc(self, GetLength()); if (result != nullptr) { - memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t)); + if (string->IsCompressed()) { + int32_t length = string->GetLength(); + for (int i = 0; i < length; ++i) { + result->GetData()[i] = string->CharAt(i); + } + } else { + memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t)); + } } else { self->AssertPendingOOMException(); } @@ -269,8 +344,18 @@ CharArray* String::ToCharArray(Thread* self) { void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) { uint16_t* data = array->GetData() + index; - uint16_t* value = GetValue() + start; - memcpy(data, value, (end - start) * sizeof(uint16_t)); + if (IsCompressed()) { + for (int i = start; i < end; ++i) { + data[i-start] = CharAt(i); + } + } else { + uint16_t* value = GetValue() + start; + memcpy(data, value, (end - start) * sizeof(uint16_t)); + } +} + +bool String::IsValueNull() { + return (IsCompressed()) ? (GetValueCompressed() == nullptr) : (GetValue() == nullptr); } } // namespace mirror diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h index d492ba3162..8695fe84f4 100644 --- a/runtime/mirror/string.h +++ b/runtime/mirror/string.h @@ -31,6 +31,9 @@ class StubTest_ReadBarrierForRoot_Test; namespace mirror { +// String Compression +static constexpr bool kUseStringCompression = false; + // C++ mirror of java.lang.String class MANAGED String FINAL : public Object { public: @@ -54,18 +57,28 @@ class MANAGED String FINAL : public Object { return &value_[0]; } + uint8_t* GetValueCompressed() SHARED_REQUIRES(Locks::mutator_lock_) { + return &value_compressed_[0]; + } + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> size_t SizeOf() SHARED_REQUIRES(Locks::mutator_lock_); + // Taking out the first/uppermost bit because it is not part of actual length value template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> int32_t GetLength() SHARED_REQUIRES(Locks::mutator_lock_) { + return GetLengthFromCount(GetCount<kVerifyFlags>()); + } + + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + int32_t GetCount() SHARED_REQUIRES(Locks::mutator_lock_) { return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(String, count_)); } void SetCount(int32_t new_count) SHARED_REQUIRES(Locks::mutator_lock_) { // Count is invariant so use non-transactional mode. Also disable check as we may run inside // a transaction. - DCHECK_LE(0, new_count); + DCHECK_LE(0, (new_count & INT32_MAX)); SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count); } @@ -82,12 +95,6 @@ class MANAGED String FINAL : public Object { String* Intern() SHARED_REQUIRES(Locks::mutator_lock_); - template <bool kIsInstrumented, typename PreFenceVisitor> - ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length, - gc::AllocatorType allocator_type, - const PreFenceVisitor& pre_fence_visitor) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); - template <bool kIsInstrumented> ALWAYS_INLINE static String* AllocFromByteArray(Thread* self, int32_t byte_length, Handle<ByteArray> array, int32_t offset, @@ -107,6 +114,11 @@ class MANAGED String FINAL : public Object { gc::AllocatorType allocator_type) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + template <bool kIsInstrumented> + ALWAYS_INLINE static String* AllocEmptyString(Thread* self, + gc::AllocatorType allocator_type) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + static String* AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); @@ -149,6 +161,10 @@ class MANAGED String FINAL : public Object { int32_t FastIndexOf(int32_t ch, int32_t start) SHARED_REQUIRES(Locks::mutator_lock_); + template <typename MemoryType> + int32_t FastIndexOf(MemoryType* chars, int32_t ch, int32_t start) + SHARED_REQUIRES(Locks::mutator_lock_); + int32_t CompareTo(String* other) SHARED_REQUIRES(Locks::mutator_lock_); CharArray* ToCharArray(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) @@ -157,6 +173,28 @@ class MANAGED String FINAL : public Object { void GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) SHARED_REQUIRES(Locks::mutator_lock_); + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + bool IsCompressed() SHARED_REQUIRES(Locks::mutator_lock_) { + return kUseStringCompression && GetCompressionFlagFromCount(GetCount()); + } + + bool IsValueNull() SHARED_REQUIRES(Locks::mutator_lock_); + + template<typename MemoryType> + static bool AllASCII(const MemoryType* const chars, const int length); + + ALWAYS_INLINE static bool GetCompressionFlagFromCount(const int32_t count) { + return kUseStringCompression && ((count & (1u << 31)) != 0); + } + + ALWAYS_INLINE static int32_t GetLengthFromCount(const int32_t count) { + return kUseStringCompression ? (count & INT32_MAX) : count; + } + + ALWAYS_INLINE static int32_t GetFlaggedCount(const int32_t count) { + return kUseStringCompression ? (count | (1u << 31)) : count; + } + static Class* GetJavaLangString() SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(!java_lang_String_.IsNull()); return java_lang_String_.Read(); @@ -174,12 +212,24 @@ class MANAGED String FINAL : public Object { SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code); } + template <bool kIsInstrumented, typename PreFenceVisitor> + ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length_with_flag, + gc::AllocatorType allocator_type, + const PreFenceVisitor& pre_fence_visitor) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses". + // First bit (uppermost/leftmost) is taken out for Compressed/Uncompressed flag + // [0] Uncompressed: string uses 16-bit memory | [1] Compressed: 8-bit memory int32_t count_; uint32_t hash_code_; - uint16_t value_[0]; + // Compression of all-ASCII into 8-bit memory leads to usage one of these fields + union { + uint16_t value_[0]; + uint8_t value_compressed_[0]; + }; static GcRoot<Class> java_lang_String_; diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc index 6d5e7c7705..d4e54cfa34 100644 --- a/runtime/native/java_lang_Class.cc +++ b/runtime/native/java_lang_Class.cc @@ -198,12 +198,25 @@ ALWAYS_INLINE static inline ArtField* FindFieldByName( } size_t low = 0; size_t high = fields->size(); - const uint16_t* const data = name->GetValue(); + const bool is_name_compressed = name->IsCompressed(); + const uint16_t* const data = (is_name_compressed) ? nullptr : name->GetValue(); + const uint8_t* const data_compressed = (is_name_compressed) ? name->GetValueCompressed() + : nullptr; const size_t length = name->GetLength(); while (low < high) { auto mid = (low + high) / 2; ArtField& field = fields->At(mid); - int result = CompareModifiedUtf8ToUtf16AsCodePointValues(field.GetName(), data, length); + int result = 0; + if (is_name_compressed) { + size_t field_length = strlen(field.GetName()); + size_t min_size = (length < field_length) ? length : field_length; + result = memcmp(field.GetName(), data_compressed, min_size); + if (result == 0) { + result = field_length - length; + } + } else { + result = CompareModifiedUtf8ToUtf16AsCodePointValues(field.GetName(), data, length); + } // Alternate approach, only a few % faster at the cost of more allocations. // int result = field->GetStringName(self, true)->CompareTo(name); if (result < 0) { @@ -636,8 +649,7 @@ static jobject Class_newInstance(JNIEnv* env, jobject javaThis) { // Invoke the string allocator to return an empty string for the string class. if (klass->IsStringClass()) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - mirror::Object* obj = mirror::String::Alloc<true>(soa.Self(), 0, allocator_type, visitor); + mirror::Object* obj = mirror::String::AllocEmptyString<true>(soa.Self(), allocator_type); if (UNLIKELY(soa.Self()->IsExceptionPending())) { return nullptr; } else { diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc index 1216824b5a..64d56f6b26 100644 --- a/runtime/native/libcore_util_CharsetUtils.cc +++ b/runtime/native/libcore_util_CharsetUtils.cc @@ -165,10 +165,9 @@ static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, ji return nullptr; } - const jchar* src = &(string->GetValue()[offset]); jbyte* dst = &bytes[0]; - for (int i = length - 1; i >= 0; --i) { - jchar ch = *src++; + for (int i = 0; i < length; ++i) { + jchar ch = string->CharAt(offset + i); if (ch > maxValidChar) { ch = '?'; } diff --git a/runtime/utf.cc b/runtime/utf.cc index 5e9fdf7fc8..7e06482635 100644 --- a/runtime/utf.cc +++ b/runtime/utf.cc @@ -170,14 +170,6 @@ void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, } } -int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) { - uint32_t hash = 0; - while (char_count--) { - hash = hash * 31 + *chars++; - } - return static_cast<int32_t>(hash); -} - int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length) { uint32_t hash = 0; while (utf16_length != 0u) { diff --git a/runtime/utf.h b/runtime/utf.h index 27d2fd5f56..7c9c333126 100644 --- a/runtime/utf.h +++ b/runtime/utf.h @@ -82,7 +82,16 @@ void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, */ int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset, size_t char_count) SHARED_REQUIRES(Locks::mutator_lock_); -int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count); + +template<typename MemoryType> +int32_t ComputeUtf16Hash(const MemoryType* chars, size_t char_count) { + uint32_t hash = 0; + while (char_count--) { + hash = hash * 31 + *chars++; + } + return static_cast<int32_t>(hash); +} + int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length); // Compute a hash code of a modified UTF-8 string. Not the standard java hash since it returns a diff --git a/test/020-string/expected.txt b/test/020-string/expected.txt index 76b8929bd7..83a0835718 100644 --- a/test/020-string/expected.txt +++ b/test/020-string/expected.txt @@ -1,6 +1,6 @@ testStr is 'This is a very nice string' This is a very nice string -Compare result is 32 +Compare result is greater than zero Compare unicode: -65302 Got expected exception subStr is 'uick brown fox jumps over the lazy ' diff --git a/test/020-string/src/Main.java b/test/020-string/src/Main.java index 710808255c..ccf94aabcc 100644 --- a/test/020-string/src/Main.java +++ b/test/020-string/src/Main.java @@ -45,7 +45,14 @@ public class Main { if (testStr.length() != testStr2.length()) System.out.println("WARNING: stringTest length mismatch"); - System.out.println("Compare result is " + testStr.compareTo(testStr2)); + int compareResult = testStr.compareTo(testStr2); + if (compareResult > 0) { + System.out.println("Compare result is greater than zero"); + } else if (compareResult == 0) { + System.out.println("Compare result is equal to zero"); + } else { + System.out.println("Compare result is less than zero"); + } // expected: -65302 String s1 = "\u0c6d\u0cb6\u0d00\u0000\u0080\u0080\u0080\u0000\u0002\u0002\u0002\u0000\u00e9\u00e9\u00e9"; |