Change string compression encoding.
Encode the string compression flag as the least significant
bit of the "count" field, with 0 meaning compressed and 1
meaning uncompressed.
The main vdex file is a tiny bit larger (+28B for prebuilt
boot images, +32 for on-device built images) and the oat
file sizes change. Measured on Nexus 9, AOSP ToT, these
changes are insignificant when string compression is
disabled (-200B for the 32-bit boot*.oat for prebuilt boot
image, -4KiB when built on the device attributable to
rounding, -16B for 64-bit boot*.oat for prebuilt boot image,
no change when built on device) but with string compression
enabled we get significant differences:
prebuilt multi-part boot image:
- 32-bit boot*.oat: -28KiB
- 64-bit boot*.oat: -24KiB
on-device built single boot image:
- 32-bit boot.oat: -32KiB
- 64-bit boot.oat: -28KiB
The boot image oat file overhead for string compression:
prebuilt multi-part boot image:
- 32-bit boot*.oat: before: ~80KiB after: ~52KiB
- 64-bit boot*.oat: before: ~116KiB after: ~92KiB
on-device built single boot image:
- 32-bit boot.oat: before: 92KiB after: 60KiB
- 64-bit boot.oat: before: 116KiB after: 92KiB
The differences in the SplitStringBenchmark seem to be lost
in the noise.
Test: Run ART test suite on host and Nexus 9 with Optimizing.
Test: Run ART test suite on host and Nexus 9 with interpreter.
Test: All of the above with string compression enabled.
Bug: 31040547
Change-Id: I7570c2b700f1a31004a2d3c18b1cc30046d35a74
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 6ff0ca4..1450b5a 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1120,7 +1120,6 @@
// Need temporary registers for String compression's feature.
if (mirror::kUseStringCompression) {
locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
@@ -1136,10 +1135,9 @@
vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
- vixl32::Register temp3, temp4;
+ vixl32::Register temp3;
if (mirror::kUseStringCompression) {
temp3 = RegisterFrom(locations->GetTemp(3));
- temp4 = RegisterFrom(locations->GetTemp(4));
}
vixl32::Label loop;
@@ -1167,23 +1165,20 @@
__ Subs(out, str, arg);
__ B(eq, &end);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- vixl32::Register temp_reg = temps.Acquire();
-
if (mirror::kUseStringCompression) {
- // Load lengths of this and argument strings.
+ // Load `count` fields of this and argument strings.
__ Ldr(temp3, MemOperand(str, count_offset));
- __ Ldr(temp4, MemOperand(arg, count_offset));
- // Clean out compression flag from lengths.
- __ Bic(temp0, temp3, 0x80000000);
- __ Bic(temp_reg, temp4, 0x80000000);
+ __ Ldr(temp2, MemOperand(arg, count_offset));
+ // Extract lengths from the `count` fields.
+ __ Lsr(temp0, temp3, 1u);
+ __ Lsr(temp1, temp2, 1u);
} else {
// Load lengths of this and argument strings.
__ Ldr(temp0, MemOperand(str, count_offset));
- __ Ldr(temp_reg, MemOperand(arg, count_offset));
+ __ Ldr(temp1, MemOperand(arg, count_offset));
}
// out = length diff.
- __ Subs(out, temp0, temp_reg);
+ __ Subs(out, temp0, temp1);
// temp0 = min(len(str), len(arg)).
{
@@ -1192,33 +1187,32 @@
CodeBufferCheckScope::kMaximumSize);
__ it(gt);
- __ mov(gt, temp0, temp_reg);
+ __ mov(gt, temp0, temp1);
}
- temps.Release(temp_reg);
// Shorter string is empty?
__ Cbz(temp0, &end);
if (mirror::kUseStringCompression) {
// Check if both strings using same compression style to use this comparison loop.
- __ Eors(temp3, temp3, temp4);
- __ B(mi, &different_compression);
- }
- // Store offset of string value in preparation for comparison loop.
- __ Mov(temp1, value_offset);
- if (mirror::kUseStringCompression) {
+ __ Eors(temp2, temp2, temp3);
+ __ Lsrs(temp2, temp2, 1u);
+ __ B(cs, &different_compression);
// For string compression, calculate the number of bytes to compare (not chars).
// This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
- __ Cmp(temp4, 0);
+ __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
2 * kMaxInstructionSizeInBytes,
CodeBufferCheckScope::kMaximumSize);
- __ it(ge);
- __ add(ge, temp0, temp0, temp0);
+ __ it(ne);
+ __ add(ne, temp0, temp0, temp0);
}
+ // Store offset of string value in preparation for comparison loop.
+ __ Mov(temp1, value_offset);
+
// Assertions that must hold in order to compare multiple characters at a time.
CHECK_ALIGNED(value_offset, 8);
static_assert(IsAligned<8>(kObjectAlignment),
@@ -1227,10 +1221,12 @@
const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
DCHECK_EQ(char_size, 2u);
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+
vixl32::Label find_char_diff_2nd_cmp;
// Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
__ Bind(&loop);
- temp_reg = temps.Acquire();
+ vixl32::Register temp_reg = temps.Acquire();
__ Ldr(temp_reg, MemOperand(str, temp1));
__ Ldr(temp2, MemOperand(arg, temp1));
__ Cmp(temp_reg, temp2);
@@ -1279,72 +1275,92 @@
// The comparison is unsigned for string compression, otherwise signed.
__ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
__ B((mirror::kUseStringCompression ? ls : le), &end);
+
// Extract the characters and calculate the difference.
- vixl32::Label uncompressed_string, continue_process;
if (mirror::kUseStringCompression) {
- __ Cmp(temp4, 0);
- __ B(ge, &uncompressed_string);
- __ Bic(temp1, temp1, 0x7);
- __ B(&continue_process);
+ // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
+ // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
+ // The compression flag is now in the highest bit of temp3, so let's play some tricks.
+ __ orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
+ __ bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
+ __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
+ __ Lsr(temp2, temp2, temp1); // Extract second character.
+ __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
+ __ Lsr(out, temp_reg, temp1); // Extract first character.
+ __ and_(temp2, temp2, temp3);
+ __ and_(out, out, temp3);
+ } else {
+ __ bic(temp1, temp1, 0xf);
+ __ Lsr(temp2, temp2, temp1);
+ __ Lsr(out, temp_reg, temp1);
+ __ movt(temp2, 0);
+ __ movt(out, 0);
}
- __ Bind(&uncompressed_string);
- __ Bic(temp1, temp1, 0xf);
- __ Bind(&continue_process);
- __ Lsr(temp2, temp2, temp1);
- __ Lsr(temp_reg, temp_reg, temp1);
- vixl32::Label calculate_difference, uncompressed_string_extract_chars;
- if (mirror::kUseStringCompression) {
- __ Cmp(temp4, 0);
- __ B(ge, &uncompressed_string_extract_chars);
- __ Ubfx(temp2, temp2, 0, 8);
- __ Ubfx(temp_reg, temp_reg, 0, 8);
- __ B(&calculate_difference);
- }
- __ Bind(&uncompressed_string_extract_chars);
- __ Movt(temp2, 0);
- __ Movt(temp_reg, 0);
- __ Bind(&calculate_difference);
- __ Sub(out, temp_reg, temp2);
+ __ Sub(out, out, temp2);
temps.Release(temp_reg);
- __ B(&end);
if (mirror::kUseStringCompression) {
+ __ B(&end);
+ __ Bind(&different_compression);
+
+ // Comparison for different compression style.
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
- vixl32::Label loop_arg_compressed, loop_this_compressed, find_diff;
- // Comparison for different compression style.
- // This part is when THIS is compressed and ARG is not.
- __ Bind(&different_compression);
- __ Add(temp2, str, value_offset);
- __ Add(temp3, arg, value_offset);
- __ Cmp(temp4, 0);
- __ B(lt, &loop_arg_compressed);
- __ Bind(&loop_this_compressed);
+ // We want to free up the temp3, currently holding `str.count`, for comparison.
+ // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
+ // need to treat as unsigned. Start by freeing the bit with an ADD and continue
+ // further down by a LSRS+SBC which will flip the meaning of the flag but allow
+ // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
+ __ add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
+ // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
+ __ mov(temp1, str);
+ __ mov(temp2, arg);
+ __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
+ {
+ AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ itt(cs); // Interleave with selection of temp1 and temp2.
+ __ mov(cs, temp1, arg); // Preserves flags.
+ __ mov(cs, temp2, str); // Preserves flags.
+ }
+ __ sbc(temp0, temp0, 0); // Complete the move of the compression flag.
+
+ // Adjust temp1 and temp2 from string pointers to data pointers.
+ __ add(temp1, temp1, value_offset);
+ __ add(temp2, temp2, value_offset);
+
+ vixl32::Label different_compression_loop;
+ vixl32::Label different_compression_diff;
+
+ // Main loop for different compression.
temp_reg = temps.Acquire();
- __ Ldrb(temp_reg, MemOperand(temp2, c_char_size, PostIndex));
- __ Ldrh(temp4, MemOperand(temp3, char_size, PostIndex));
- __ Cmp(temp_reg, temp4);
- __ B(ne, &find_diff);
- __ Subs(temp0, temp0, 1);
- __ B(gt, &loop_this_compressed);
- __ B(&end);
-
- // This part is when THIS is not compressed and ARG is.
- __ Bind(&loop_arg_compressed);
- __ Ldrh(temp_reg, MemOperand(temp2, char_size, PostIndex));
- __ Ldrb(temp4, MemOperand(temp3, c_char_size, PostIndex));
- __ Cmp(temp_reg, temp4);
- __ B(ne, &find_diff);
- __ Subs(temp0, temp0, 1);
- __ B(gt, &loop_arg_compressed);
+ __ Bind(&different_compression_loop);
+ __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
+ __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
+ __ cmp(temp_reg, temp3);
+ __ B(ne, &different_compression_diff);
+ __ Subs(temp0, temp0, 2);
+ __ B(hi, &different_compression_loop);
__ B(&end);
// Calculate the difference.
- __ Bind(&find_diff);
- __ Sub(out, temp_reg, temp4);
+ __ Bind(&different_compression_diff);
+ __ Sub(out, temp_reg, temp3);
temps.Release(temp_reg);
+ // Flip the difference if the `arg` is compressed.
+ // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
+ __ Lsrs(temp0, temp0, 1u);
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+
+ AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cc);
+ __ rsb(cc, out, out, 0);
}
__ Bind(&end);
@@ -1382,7 +1398,7 @@
vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
- vixl32::Label loop, preloop;
+ vixl32::Label loop;
vixl32::Label end;
vixl32::Label return_true;
vixl32::Label return_false;
@@ -1401,6 +1417,10 @@
__ Cbz(arg, &return_false);
}
+ // Reference equality check, return true if same reference.
+ __ Cmp(str, arg);
+ __ B(eq, &return_true);
+
if (!optimizations.GetArgumentIsString()) {
// Instanceof check for the argument by comparing class fields.
// All string objects must have the same type since String cannot be subclassed.
@@ -1412,48 +1432,47 @@
__ B(ne, &return_false);
}
- // Load lengths of this and argument strings.
+ // Load `count` fields of this and argument strings.
__ Ldr(temp, MemOperand(str, count_offset));
__ Ldr(temp1, MemOperand(arg, count_offset));
- // Check if lengths are equal, return false if they're not.
+ // Check if `count` fields are equal, return false if they're not.
// Also compares the compression style, if differs return false.
__ Cmp(temp, temp1);
__ B(ne, &return_false);
- // Return true if both strings are empty.
- if (mirror::kUseStringCompression) {
- // Length needs to be masked out first because 0 is treated as compressed.
- __ Bic(temp, temp, 0x80000000);
- }
+ // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
__ Cbz(temp, &return_true);
- // Reference equality check, return true if same reference.
- __ Cmp(str, arg);
- __ B(eq, &return_true);
- // Assertions that must hold in order to compare strings 2 characters at a time.
+ // Assertions that must hold in order to compare strings 4 bytes at a time.
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
if (mirror::kUseStringCompression) {
- // If not compressed, directly to fast compare. Else do preprocess on length.
- __ Cmp(temp1, 0);
- __ B(gt, &preloop);
- // Mask out compression flag and adjust length for compressed string (8-bit)
- // as if it is a 16-bit data, new_length = (length + 1) / 2.
- __ Add(temp, temp, 1);
- __ Lsr(temp, temp, 1);
- __ Bind(&preloop);
+ // For string compression, calculate the number of bytes to compare (not chars).
+ // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+ __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
+ AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cs); // If uncompressed,
+ __ add(cs, temp, temp, temp); // double the byte count.
}
- // Loop to compare strings 2 characters at a time starting at the front of the string.
- // Ok to do this because strings with an odd length are zero-padded.
+
+ // Store offset of string value in preparation for comparison loop.
__ Mov(temp1, value_offset);
+
+ // Loop to compare strings 4 bytes at a time starting at the front of the string.
+ // Ok to do this because strings are zero-padded to kObjectAlignment.
__ Bind(&loop);
__ Ldr(out, MemOperand(str, temp1));
__ Ldr(temp2, MemOperand(arg, temp1));
+ __ Add(temp1, temp1, sizeof(uint32_t));
__ Cmp(out, temp2);
__ B(ne, &return_false);
- __ Add(temp1, temp1, sizeof(uint32_t));
- __ Subs(temp, temp, sizeof(uint32_t) / sizeof(uint16_t));
- __ B(gt, &loop);
+ // With string compression, we have compared 4 bytes, otherwise 2 chars.
+ __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
+ __ B(hi, &loop);
// Return true and exit the function.
// If loop does not result in returning false, we return true.
@@ -2547,9 +2566,9 @@
temp = temps.Acquire();
// String's length.
__ Ldr(temp, MemOperand(srcObj, count_offset));
- __ Cmp(temp, 0);
+ __ Tst(temp, 1);
temps.Release(temp);
- __ B(lt, &compressed_string_preloop);
+ __ B(eq, &compressed_string_preloop);
}
__ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
@@ -2588,9 +2607,10 @@
__ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
temps.Release(temp);
__ B(gt, &remainder);
- __ B(&done);
if (mirror::kUseStringCompression) {
+ __ B(&done);
+
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
// Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.