Change string compression encoding.
Encode the string compression flag as the least significant
bit of the "count" field, with 0 meaning compressed and 1
meaning uncompressed.
The main vdex file is a tiny bit larger (+28B for prebuilt
boot images, +32 for on-device built images) and the oat
file sizes change. Measured on Nexus 9, AOSP ToT, these
changes are insignificant when string compression is
disabled (-200B for the 32-bit boot*.oat for prebuilt boot
image, -4KiB when built on the device attributable to
rounding, -16B for 64-bit boot*.oat for prebuilt boot image,
no change when built on device) but with string compression
enabled we get significant differences:
prebuilt multi-part boot image:
- 32-bit boot*.oat: -28KiB
- 64-bit boot*.oat: -24KiB
on-device built single boot image:
- 32-bit boot.oat: -32KiB
- 64-bit boot.oat: -28KiB
The boot image oat file overhead for string compression:
prebuilt multi-part boot image:
- 32-bit boot*.oat: before: ~80KiB after: ~52KiB
- 64-bit boot*.oat: before: ~116KiB after: ~92KiB
on-device built single boot image:
- 32-bit boot.oat: before: 92KiB after: 60KiB
- 64-bit boot.oat: before: 116KiB after: 92KiB
The differences in the SplitStringBenchmark seem to be lost
in the noise.
Test: Run ART test suite on host and Nexus 9 with Optimizing.
Test: Run ART test suite on host and Nexus 9 with interpreter.
Test: All of the above with string compression enabled.
Bug: 31040547
Change-Id: I7570c2b700f1a31004a2d3c18b1cc30046d35a74
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 0135260..b76050c 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1768,12 +1768,15 @@
.cfi_rel_offset r10, 4
.cfi_rel_offset r11, 8
.cfi_rel_offset lr, 12
+#if (STRING_COMPRESSION_FEATURE)
+ ldr r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
+#else
ldr r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
+#endif
add r0, #MIRROR_STRING_VALUE_OFFSET
#if (STRING_COMPRESSION_FEATURE)
/* r4 count (with flag) and r3 holds actual length */
- mov r4, r3
- bic r3, #2147483648
+ lsr r3, r4, #1
#endif
/* Clamp start to [0..count] */
cmp r2, #0
@@ -1788,8 +1791,8 @@
/* Build pointer to start of data to compare and pre-bias */
#if (STRING_COMPRESSION_FEATURE)
- cmp r4, #0
- blt .Lstring_indexof_compressed
+ lsrs r4, r4, #1
+ bcc .Lstring_indexof_compressed
#endif
add r0, r0, r2, lsl #1
sub r0, #2
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d806715..74643f7 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2402,12 +2402,15 @@
* w2: Starting offset in string data
*/
ENTRY art_quick_indexof
+#if (STRING_COMPRESSION_FEATURE)
+ ldr w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
+#else
ldr w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
+#endif
add x0, x0, #MIRROR_STRING_VALUE_OFFSET
#if (STRING_COMPRESSION_FEATURE)
/* w4 holds count (with flag) and w3 holds actual length */
- mov w4, w3
- and w3, w3, #2147483647
+ lsr w3, w4, #1
#endif
/* Clamp start to [0..count] */
cmp w2, #0
@@ -2419,7 +2422,7 @@
mov x5, x0
#if (STRING_COMPRESSION_FEATURE)
- tbnz w4, #31, .Lstring_indexof_compressed
+ tbz w4, #0, .Lstring_indexof_compressed
#endif
/* Build pointer to start of data to compare and pre-bias */
add x0, x0, x2, lsl #1
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 98739d3..0f1efce 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2035,15 +2035,14 @@
lea MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
#if (STRING_COMPRESSION_FEATURE)
/* Differ cases */
- cmpl LITERAL(0), %edx
- jl .Lstring_compareto_this_is_compressed
- cmpl LITERAL(0), %ebx
- jl .Lstring_compareto_that_is_compressed
+ shrl LITERAL(1), %edx
+ jnc .Lstring_compareto_this_is_compressed
+ shrl LITERAL(1), %ebx
+ jnc .Lstring_compareto_that_is_compressed
jmp .Lstring_compareto_both_not_compressed
.Lstring_compareto_this_is_compressed:
- andl LITERAL(0x7FFFFFFF), %edx
- cmpl LITERAL(0), %ebx
- jl .Lstring_compareto_both_compressed
+ shrl LITERAL(1), %ebx
+ jnc .Lstring_compareto_both_compressed
/* If (this->IsCompressed() && that->IsCompressed() == false) */
mov %edx, %eax
subl %ebx, %eax
@@ -2061,7 +2060,6 @@
cmovne %edx, %eax // return eax = *(this_cur_char) - *(that_cur_char)
jmp .Lstring_compareto_return
.Lstring_compareto_that_is_compressed:
- andl LITERAL(0x7FFFFFFF), %ebx
mov %edx, %eax
subl %ebx, %eax
mov %edx, %ecx
@@ -2078,7 +2076,6 @@
cmovne %edx, %eax
jmp .Lstring_compareto_return // return eax = *(this_cur_char) - *(that_cur_char)
.Lstring_compareto_both_compressed:
- andl LITERAL(0x7FFFFFFF), %ebx
/* Calculate min length and count diff */
mov %edx, %ecx
mov %edx, %eax
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 185e55e..0d0ab93 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2142,15 +2142,14 @@
leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
#if (STRING_COMPRESSION_FEATURE)
/* Differ cases */
- cmpl LITERAL(0), %r8d
- jl .Lstring_compareto_this_is_compressed
- cmpl LITERAL(0), %r9d
- jl .Lstring_compareto_that_is_compressed
+ shrl LITERAL(1), %r8d
+ jnc .Lstring_compareto_this_is_compressed
+ shrl LITERAL(1), %r9d
+ jnc .Lstring_compareto_that_is_compressed
jmp .Lstring_compareto_both_not_compressed
.Lstring_compareto_this_is_compressed:
- andl LITERAL(0x7FFFFFFF), %r8d
- cmpl LITERAL(0), %r9d
- jl .Lstring_compareto_both_compressed
+ shrl LITERAL(1), %r9d
+ jnc .Lstring_compareto_both_compressed
/* Comparison this (8-bit) and that (16-bit) */
mov %r8d, %eax
subl %r9d, %eax
@@ -2169,7 +2168,6 @@
.Lstring_compareto_keep_length1:
ret
.Lstring_compareto_that_is_compressed:
- andl LITERAL(0x7FFFFFFF), %r9d
movl %r8d, %eax
subl %r9d, %eax
mov %r8d, %ecx
@@ -2187,7 +2185,6 @@
.Lstring_compareto_keep_length2:
ret
.Lstring_compareto_both_compressed:
- andl LITERAL(0x7FFFFFFF), %r9d
/* Calculate min length and count diff */
movl %r8d, %ecx
movl %r8d, %eax
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 6bf7e15..2257fd6 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1097,10 +1097,12 @@
return;
}
DCHECK_GE(start, 0);
- DCHECK_GE(end, string->GetLength());
+ DCHECK_LE(start, end);
+ DCHECK_LE(end, string->GetLength());
StackHandleScope<1> hs(self);
Handle<mirror::CharArray> h_char_array(
hs.NewHandle(shadow_frame->GetVRegReference(arg_offset + 3)->AsCharArray()));
+ DCHECK_GE(index, 0);
DCHECK_LE(index, h_char_array->GetLength());
DCHECK_LE(end - start, h_char_array->GetLength() - index);
string->GetChars(start, end, h_char_array, index);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index e990935..a421c34 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -679,12 +679,8 @@
ASSERT_TRUE(env_->IsInstanceOf(o, c));
// ...whose fields haven't been initialized because
// we didn't call a constructor.
- if (art::mirror::kUseStringCompression) {
- // Zero-length string is compressed, so the length internally will be -(1 << 31).
- ASSERT_EQ(-2147483648, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
- } else {
- ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
- }
+ // Even with string compression empty string has `count == 0`.
+ ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
}
TEST_F(JniInternalTest, GetVersion) {
@@ -895,11 +891,12 @@
// Make sure we can actually use it.
jstring s = env_->NewStringUTF("poop");
if (mirror::kUseStringCompression) {
- // Negative because s is compressed (first bit is 1)
- ASSERT_EQ(-2147483644, env_->GetIntField(s, fid2));
+ ASSERT_EQ(mirror::String::GetFlaggedCount(4, /* compressible */ true),
+ env_->GetIntField(s, fid2));
// Create incompressible string
jstring s_16 = env_->NewStringUTF("\u0444\u0444");
- ASSERT_EQ(2, env_->GetIntField(s_16, fid2));
+ ASSERT_EQ(mirror::String::GetFlaggedCount(2, /* compressible */ false),
+ env_->GetIntField(s_16, fid2));
} else {
ASSERT_EQ(4, env_->GetIntField(s, fid2));
}
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index d94b39f..6870fda 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -106,9 +106,7 @@
string->SetCount(count_);
const uint16_t* const src = src_array_->GetData() + offset_;
const int32_t length = String::GetLengthFromCount(count_);
- bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
- DCHECK(!compressible || kUseStringCompression);
- if (compressible) {
+ if (kUseStringCompression && String::IsCompressed(count_)) {
for (int i = 0; i < length; ++i) {
string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]);
}
@@ -126,7 +124,8 @@
// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
class SetStringCountAndValueVisitorFromString {
public:
- SetStringCountAndValueVisitorFromString(int32_t count, Handle<String> src_string,
+ SetStringCountAndValueVisitorFromString(int32_t count,
+ Handle<String> src_string,
int32_t offset) :
count_(count), src_string_(src_string), offset_(offset) {
}
@@ -137,8 +136,7 @@
ObjPtr<String> string = ObjPtr<String>::DownCast(obj);
string->SetCount(count_);
const int32_t length = String::GetLengthFromCount(count_);
- bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
- DCHECK(!compressible || kUseStringCompression);
+ bool compressible = kUseStringCompression && String::IsCompressed(count_);
if (src_string_->IsCompressed()) {
const uint8_t* const src = src_string_->GetValueCompressed() + offset_;
memcpy(string->GetValueCompressed(), src, length * sizeof(uint8_t));
@@ -209,8 +207,7 @@
gc::AllocatorType allocator_type,
const PreFenceVisitor& pre_fence_visitor) {
constexpr size_t header_size = sizeof(String);
- const bool compressible = kUseStringCompression &&
- String::GetCompressionFlagFromCount(utf16_length_with_flag);
+ const bool compressible = kUseStringCompression && String::IsCompressed(utf16_length_with_flag);
const size_t block_size = (compressible) ? sizeof(uint8_t) : sizeof(uint16_t);
size_t length = String::GetLengthFromCount(utf16_length_with_flag);
static_assert(sizeof(length) <= sizeof(size_t),
@@ -245,7 +242,7 @@
template <bool kIsInstrumented>
inline String* String::AllocEmptyString(Thread* self, gc::AllocatorType allocator_type) {
- const int32_t length_with_flag = String::GetFlaggedCount(0);
+ const int32_t length_with_flag = String::GetFlaggedCount(0, /* compressible */ true);
SetStringCountVisitor visitor(length_with_flag);
return Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
}
@@ -255,10 +252,9 @@
Handle<ByteArray> array, int32_t offset,
int32_t high_byte, gc::AllocatorType allocator_type) {
const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset;
- const bool compressible = kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length)
- && (high_byte == 0);
- const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(byte_length)
- : byte_length;
+ const bool compressible =
+ kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length) && (high_byte == 0);
+ const int32_t length_with_flag = String::GetFlaggedCount(byte_length, compressible);
SetStringCountAndBytesVisitor visitor(length_with_flag, array, offset, high_byte << 8);
String* string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
return string;
@@ -272,7 +268,7 @@
DCHECK_GE(array->GetLength(), count);
const bool compressible = kUseStringCompression &&
String::AllASCII<uint16_t>(array->GetData() + offset, count);
- const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(count) : count;
+ const int32_t length_with_flag = String::GetFlaggedCount(count, compressible);
SetStringCountAndValueVisitorFromCharArray visitor(length_with_flag, array, offset);
String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
return new_string;
@@ -284,8 +280,7 @@
const bool compressible = kUseStringCompression &&
((string->IsCompressed()) ? true : String::AllASCII<uint16_t>(string->GetValue() + offset,
string_length));
- const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(string_length)
- : string_length;
+ const int32_t length_with_flag = String::GetFlaggedCount(string_length, compressible);
SetStringCountAndValueVisitorFromString visitor(length_with_flag, string, offset);
String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
return new_string;
@@ -311,7 +306,7 @@
template<typename MemoryType>
bool String::AllASCII(const MemoryType* const chars, const int length) {
for (int i = 0; i < length; ++i) {
- if (chars[i] > 0x80) {
+ if (chars[i] >= 0x80) {
return false;
}
}
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 4336aa1..0ab0bd6 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -95,8 +95,7 @@
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
const bool compressible = kUseStringCompression &&
(string->IsCompressed() && string2->IsCompressed());
- const int32_t length_with_flag = compressible ? String::GetFlaggedCount(length + length2)
- : (length + length2);
+ const int32_t length_with_flag = String::GetFlaggedCount(length + length2, compressible);
SetStringCountVisitor visitor(length_with_flag);
ObjPtr<String> new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
@@ -132,8 +131,7 @@
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
const bool compressible = kUseStringCompression &&
String::AllASCII<uint16_t>(utf16_data_in, utf16_length);
- int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
- : utf16_length;
+ int32_t length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
SetStringCountVisitor visitor(length_with_flag);
ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
if (UNLIKELY(string == nullptr)) {
@@ -169,8 +167,7 @@
int32_t utf8_length) {
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
const bool compressible = kUseStringCompression && (utf16_length == utf8_length);
- const int32_t utf16_length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
- : utf16_length;
+ const int32_t utf16_length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
SetStringCountVisitor visitor(utf16_length_with_flag);
ObjPtr<String> string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor);
if (UNLIKELY(string == nullptr)) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 6ce75bc..95b6c3e 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -33,6 +33,10 @@
// String Compression
static constexpr bool kUseStringCompression = false;
+enum class StringCompressionFlag : uint32_t {
+ kCompressed = 0u,
+ kUncompressed = 1u
+};
// C++ mirror of java.lang.String
class MANAGED String FINAL : public Object {
@@ -78,7 +82,6 @@
void SetCount(int32_t new_count) REQUIRES_SHARED(Locks::mutator_lock_) {
// Count is invariant so use non-transactional mode. Also disable check as we may run inside
// a transaction.
- DCHECK_LE(0, (new_count & INT32_MAX));
SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
}
@@ -175,7 +178,7 @@
template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
bool IsCompressed() REQUIRES_SHARED(Locks::mutator_lock_) {
- return kUseStringCompression && GetCompressionFlagFromCount(GetCount());
+ return kUseStringCompression && IsCompressed(GetCount());
}
bool IsValueNull() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -183,16 +186,27 @@
template<typename MemoryType>
static bool AllASCII(const MemoryType* const chars, const int length);
- ALWAYS_INLINE static bool GetCompressionFlagFromCount(const int32_t count) {
- return kUseStringCompression && ((count & (1u << 31)) != 0);
+ ALWAYS_INLINE static bool IsCompressed(int32_t count) {
+ return GetCompressionFlagFromCount(count) == StringCompressionFlag::kCompressed;
}
- ALWAYS_INLINE static int32_t GetLengthFromCount(const int32_t count) {
- return kUseStringCompression ? (count & INT32_MAX) : count;
+ ALWAYS_INLINE static StringCompressionFlag GetCompressionFlagFromCount(int32_t count) {
+ return kUseStringCompression
+ ? static_cast<StringCompressionFlag>(static_cast<uint32_t>(count) & 1u)
+ : StringCompressionFlag::kUncompressed;
}
- ALWAYS_INLINE static int32_t GetFlaggedCount(const int32_t count) {
- return kUseStringCompression ? (count | (1u << 31)) : count;
+ ALWAYS_INLINE static int32_t GetLengthFromCount(int32_t count) {
+ return kUseStringCompression ? static_cast<int32_t>(static_cast<uint32_t>(count) >> 1) : count;
+ }
+
+ ALWAYS_INLINE static int32_t GetFlaggedCount(int32_t length, bool compressible) {
+ return kUseStringCompression
+ ? static_cast<int32_t>((static_cast<uint32_t>(length) << 1) |
+ (static_cast<uint32_t>(compressible
+ ? StringCompressionFlag::kCompressed
+ : StringCompressionFlag::kUncompressed)))
+ : length;
}
static Class* GetJavaLangString() REQUIRES_SHARED(Locks::mutator_lock_) {