String Compression for ARM and ARM64

Changes on intrinsics and Code Generation on ARM and ARM64
for string compression feature. Currently the feature is off.

The size of boot.oat and boot.art for ARM before and after the
changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.60% and boot.art decreased by 9.38%.

Meanwhile for ARM64, size of boot.oat and boot.art before and
after changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.48% and boot.art decreased by 6.58%.

Turn feature on: runtime/mirror/string.h (kUseStringCompression = true)
runtime/asm_support.h (STRING_COMPRESSION_FEATURE 1)

Test: m -j31 test-art-target
All tests passed both when the mirror::kUseStringCompression
is ON and OFF.

Bug: 31040547
Change-Id: I24e86b99391df33ba27df747779b648c5a820649
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index fd2da10..96a6ecb 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1039,6 +1039,11 @@
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  // Need temporary registers for String compression's feature.
+  if (mirror::kUseStringCompression) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -1053,10 +1058,16 @@
   Register temp0 = locations->GetTemp(0).AsRegister<Register>();
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+  Register temp3, temp4;
+  if (mirror::kUseStringCompression) {
+    temp3 = locations->GetTemp(3).AsRegister<Register>();
+    temp4 = locations->GetTemp(4).AsRegister<Register>();
+  }
 
   Label loop;
   Label find_char_diff;
   Label end;
+  Label different_compression;
 
   // Get offsets of count and value fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1077,20 +1088,40 @@
   // Reference equality check, return 0 if same reference.
   __ subs(out, str, ShifterOperand(arg));
   __ b(&end, EQ);
-  // Load lengths of this and argument strings.
-  __ ldr(temp2, Address(str, count_offset));
-  __ ldr(temp1, Address(arg, count_offset));
+  if (mirror::kUseStringCompression) {
+    // Load lengths of this and argument strings.
+    __ ldr(temp3, Address(str, count_offset));
+    __ ldr(temp4, Address(arg, count_offset));
+    // Clean out compression flag from lengths.
+    __ bic(temp0, temp3, ShifterOperand(0x80000000));
+    __ bic(IP, temp4, ShifterOperand(0x80000000));
+  } else {
+    // Load lengths of this and argument strings.
+    __ ldr(temp0, Address(str, count_offset));
+    __ ldr(IP, Address(arg, count_offset));
+  }
   // out = length diff.
-  __ subs(out, temp2, ShifterOperand(temp1));
+  __ subs(out, temp0, ShifterOperand(IP));
   // temp0 = min(len(str), len(arg)).
-  __ it(Condition::LT, kItElse);
-  __ mov(temp0, ShifterOperand(temp2), Condition::LT);
-  __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+  __ it(GT);
+  __ mov(temp0, ShifterOperand(IP), GT);
   // Shorter string is empty?
   __ CompareAndBranchIfZero(temp0, &end);
 
+  if (mirror::kUseStringCompression) {
+    // Check if both strings using same compression style to use this comparison loop.
+    __ eors(temp3, temp3, ShifterOperand(temp4));
+    __ b(&different_compression, MI);
+  }
   // Store offset of string value in preparation for comparison loop.
   __ mov(temp1, ShifterOperand(value_offset));
+  if (mirror::kUseStringCompression) {
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+    __ cmp(temp4, ShifterOperand(0));
+    __ it(GE);
+    __ add(temp0, temp0, ShifterOperand(temp0), GE);
+  }
 
   // Assertions that must hold in order to compare multiple characters at a time.
   CHECK_ALIGNED(value_offset, 8);
@@ -1100,6 +1131,7 @@
   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   DCHECK_EQ(char_size, 2u);
 
+  Label find_char_diff_2nd_cmp;
   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
   __ Bind(&loop);
   __ ldr(IP, Address(str, temp1));
@@ -1107,43 +1139,113 @@
   __ cmp(IP, ShifterOperand(temp2));
   __ b(&find_char_diff, NE);
   __ add(temp1, temp1, ShifterOperand(char_size * 2));
-  __ sub(temp0, temp0, ShifterOperand(2));
 
   __ ldr(IP, Address(str, temp1));
   __ ldr(temp2, Address(arg, temp1));
   __ cmp(IP, ShifterOperand(temp2));
-  __ b(&find_char_diff, NE);
+  __ b(&find_char_diff_2nd_cmp, NE);
   __ add(temp1, temp1, ShifterOperand(char_size * 2));
-  __ subs(temp0, temp0, ShifterOperand(2));
-
-  __ b(&loop, GT);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4));
+  __ b(&loop, HI);
   __ b(&end);
 
-  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff_2nd_cmp);
+  if (mirror::kUseStringCompression) {
+    __ subs(temp0, temp0, ShifterOperand(4));  // 4 bytes previously compared.
+    __ b(&end, LS);  // Was the second comparison fully beyond the end?
+  } else {
+    // Without string compression, we can start treating temp0 as signed
+    // and rely on the signed comparison below.
+    __ sub(temp0, temp0, ShifterOperand(2));
+  }
+
+  // Find the single character difference.
   __ Bind(&find_char_diff);
   // Get the bit position of the first character that differs.
   __ eor(temp1, temp2, ShifterOperand(IP));
   __ rbit(temp1, temp1);
   __ clz(temp1, temp1);
 
-  // temp0 = number of 16-bit characters remaining to compare.
-  // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
-  // after the end of the shorter string data).
+  // temp0 = number of characters remaining to compare.
+  // (Without string compression, it could be < 1 if a difference is found by the second CMP
+  // in the comparison loop, and after the end of the shorter string data).
 
-  // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
-  // interval [0,1] (0 for low half-word different, 1 for high half-word different).
+  // Without string compression (temp1 >> 4) = character where difference occurs between the last
+  // two words compared, in the interval [0,1].
+  // (0 for low half-word different, 1 for high half-word different).
+  // With string compression, (temp1 << 3) = byte where the difference occurs,
+  // in the interval [0,3].
 
-  // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
-  // return length diff (out).
-  __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
-  __ b(&end, LE);
+  // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
+  // the remaining string data, so just return length diff (out).
+  // The comparison is unsigned for string compression, otherwise signed.
+  __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
+  __ b(&end, mirror::kUseStringCompression ? LS : LE);
   // Extract the characters and calculate the difference.
+  Label uncompressed_string, continue_process;
+  if (mirror::kUseStringCompression) {
+    __ cmp(temp4, ShifterOperand(0));
+    __ b(&uncompressed_string, GE);
+    __ bic(temp1, temp1, ShifterOperand(0x7));
+    __ b(&continue_process);
+  }
+  __ Bind(&uncompressed_string);
   __ bic(temp1, temp1, ShifterOperand(0xf));
+  __ Bind(&continue_process);
+
   __ Lsr(temp2, temp2, temp1);
   __ Lsr(IP, IP, temp1);
+  Label calculate_difference, uncompressed_string_extract_chars;
+  if (mirror::kUseStringCompression) {
+    __ cmp(temp4, ShifterOperand(0));
+    __ b(&uncompressed_string_extract_chars, GE);
+    __ ubfx(temp2, temp2, 0, 8);
+    __ ubfx(IP, IP, 0, 8);
+    __ b(&calculate_difference);
+  }
+  __ Bind(&uncompressed_string_extract_chars);
   __ movt(temp2, 0);
   __ movt(IP, 0);
+  __ Bind(&calculate_difference);
   __ sub(out, IP, ShifterOperand(temp2));
+  __ b(&end);
+
+  if (mirror::kUseStringCompression) {
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    Label loop_arg_compressed, loop_this_compressed, find_diff;
+    // Comparison for different compression style.
+    // This part is when THIS is compressed and ARG is not.
+    __ Bind(&different_compression);
+    __ add(temp2, str, ShifterOperand(value_offset));
+    __ add(temp3, arg, ShifterOperand(value_offset));
+    __ cmp(temp4, ShifterOperand(0));
+    __ b(&loop_arg_compressed, LT);
+
+    __ Bind(&loop_this_compressed);
+    __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex));
+    __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex));
+    __ cmp(IP, ShifterOperand(temp4));
+    __ b(&find_diff, NE);
+    __ subs(temp0, temp0, ShifterOperand(1));
+    __ b(&loop_this_compressed, GT);
+    __ b(&end);
+
+    // This part is when THIS is not compressed and ARG is.
+    __ Bind(&loop_arg_compressed);
+    __ ldrh(IP, Address(temp2, char_size, Address::PostIndex));
+    __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex));
+    __ cmp(IP, ShifterOperand(temp4));
+    __ b(&find_diff, NE);
+    __ subs(temp0, temp0, ShifterOperand(1));
+    __ b(&loop_arg_compressed, GT);
+    __ b(&end);
+
+    // Calculate the difference.
+    __ Bind(&find_diff);
+    __ sub(out, IP, ShifterOperand(temp4));
+  }
 
   __ Bind(&end);
 
@@ -1180,7 +1282,7 @@
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
 
-  Label loop;
+  Label loop, preloop;
   Label end;
   Label return_true;
   Label return_false;
@@ -1214,11 +1316,15 @@
   __ ldr(temp, Address(str, count_offset));
   __ ldr(temp1, Address(arg, count_offset));
   // Check if lengths are equal, return false if they're not.
+  // Also compares the compression style, if differs return false.
   __ cmp(temp, ShifterOperand(temp1));
   __ b(&return_false, NE);
   // Return true if both strings are empty.
+  if (mirror::kUseStringCompression) {
+    // Length needs to be masked out first because 0 is treated as compressed.
+    __ bic(temp, temp, ShifterOperand(0x80000000));
+  }
   __ cbz(temp, &return_true);
-
   // Reference equality check, return true if same reference.
   __ cmp(str, ShifterOperand(arg));
   __ b(&return_true, EQ);
@@ -1227,10 +1333,19 @@
   DCHECK_ALIGNED(value_offset, 4);
   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
-  __ LoadImmediate(temp1, value_offset);
-
+  if (mirror::kUseStringCompression) {
+    // If not compressed, directly to fast compare. Else do preprocess on length.
+    __ cmp(temp1, ShifterOperand(0));
+    __ b(&preloop, GT);
+    // Mask out compression flag and adjust length for compressed string (8-bit)
+    // as if it is a 16-bit data, new_length = (length + 1) / 2.
+    __ add(temp, temp, ShifterOperand(1));
+    __ Lsr(temp, temp, 1);
+    __ Bind(&preloop);
+  }
   // Loop to compare strings 2 characters at a time starting at the front of the string.
   // Ok to do this because strings with an odd length are zero-padded.
+  __ LoadImmediate(temp1, value_offset);
   __ Bind(&loop);
   __ ldr(out, Address(str, temp1));
   __ ldr(temp2, Address(arg, temp1));
@@ -2330,22 +2445,31 @@
   Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
   Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
 
-  // src range to copy.
-  __ add(src_ptr, srcObj, ShifterOperand(value_offset));
-  __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
-
+  Label done, compressed_string_loop;
   // dst to be copied.
   __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
   __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
 
   __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
-
-  // Do the copy.
-  Label loop, remainder, done;
-
   // Early out for valid zero-length retrievals.
   __ b(&done, EQ);
 
+  // src range to copy.
+  __ add(src_ptr, srcObj, ShifterOperand(value_offset));
+  Label compressed_string_preloop;
+  if (mirror::kUseStringCompression) {
+    // Location of count in string.
+    const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+    // String's length.
+    __ ldr(IP, Address(srcObj, count_offset));
+    __ cmp(IP, ShifterOperand(0));
+    __ b(&compressed_string_preloop, LT);
+  }
+  __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
+
+  // Do the copy.
+  Label loop, remainder;
+
   // Save repairing the value of num_chr on the < 4 character path.
   __ subs(IP, num_chr, ShifterOperand(4));
   __ b(&remainder, LT);
@@ -2374,6 +2498,20 @@
   __ subs(num_chr, num_chr, ShifterOperand(1));
   __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
   __ b(&remainder, GT);
+  __ b(&done);
+
+  if (mirror::kUseStringCompression) {
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+    __ Bind(&compressed_string_preloop);
+    __ add(src_ptr, src_ptr, ShifterOperand(srcBegin));
+    __ Bind(&compressed_string_loop);
+    __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex));
+    __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
+    __ subs(num_chr, num_chr, ShifterOperand(1));
+    __ b(&compressed_string_loop, GT);
+  }
 
   __ Bind(&done);
 }