ART: Improve VisitStringGetCharsNoCheck intrinsic for compressed strings, using SIMD
The previous implementation of VisitStringGetCharsNoCheck
copies one character at a time for compressed strings (that
use 8 bits per char).
Instead, use SIMD instructions to copy 8 chars at once
where possible.
On a Pixel 3 phone:
Microbenchmarks for getCharsNoCheck on varying string
lengths show a speedup of up to 80% (big cores) and
70% (little cores) on long strings, and around 30% (big)
and 20% (little) on strings of only 8 characters.
The overhead for strings of < 8 characters is ~3%,
and is immediately amortized for strings of more
than 8 characters.
Dhrystone shows a consistent speedup of around 6% (big)
and 4% (little).
The getCharsNoCheck intrinsic is used by the StringBuilder
append() method, which is used by the String concatenate
operator ('+').
Image size change:
Before:
boot-core-libart.oat: 549040
boot.oat: 3789080
boot-framework.oat: 13356576
After:
boot-core-libart.oat: 549024 (-16B)
boot.oat: 3789144 (+64B)
boot-framework.oat: 13356576 (+ 0B)
Test: test_art_target.sh, test_art_host.sh
Test: 536-checker-intrinsic-optimization
Change-Id: I865e3df6d4725e151ae195a86e02e090dae8dd29
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 1fab712..185d487 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1960,7 +1960,8 @@
Register tmp2 = temps.AcquireX();
vixl::aarch64::Label done;
- vixl::aarch64::Label compressed_string_loop;
+ vixl::aarch64::Label compressed_string_vector_loop;
+ vixl::aarch64::Label compressed_string_remainder;
__ Sub(num_chr, srcEnd, srcBegin);
// Early out for valid zero-length retrievals.
__ Cbz(num_chr, &done);
@@ -2013,16 +2014,39 @@
__ B(&done);
if (mirror::kUseStringCompression) {
+ // For compressed strings, acquire a SIMD temporary register.
+ FPRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
DCHECK_EQ(c_char_size, 1u);
__ Bind(&compressed_string_preloop);
__ Add(src_ptr, src_ptr, Operand(srcBegin));
- // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
- __ Bind(&compressed_string_loop);
+
+ // Save repairing the value of num_chr on the < 8 character path.
+ __ Subs(tmp1, num_chr, 8);
+ __ B(lt, &compressed_string_remainder);
+
+ // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
+ __ Mov(num_chr, tmp1);
+
+ // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
+ // Uses SIMD instructions.
+ __ Bind(&compressed_string_vector_loop);
+ __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
+ __ Subs(num_chr, num_chr, 8);
+ __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
+ __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
+ __ B(ge, &compressed_string_vector_loop);
+
+ __ Adds(num_chr, num_chr, 8);
+ __ B(eq, &done);
+
+ // Loop for < 8 character case and remainder handling with a compressed src.
+ // Copies 1 character (8-bit) to (16-bit) at a time.
+ __ Bind(&compressed_string_remainder);
__ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
__ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
__ Subs(num_chr, num_chr, Operand(1));
- __ B(gt, &compressed_string_loop);
+ __ B(gt, &compressed_string_remainder);
}
__ Bind(&done);
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index a2bd42f..630d86d 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -17,6 +17,21 @@
import java.lang.reflect.Method;
public class Main {
+ public static String smallString = generateString(100);
+ public static String mediumString = generateString(300);
+ public static String largeString = generateString(2000);
+
+ public static String generateString(int length) {
+ // Generate a string in the ASCII range that will
+ // use string compression.
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < length; i++) {
+ // Generate repeating alphabet.
+ sb.append(Character.valueOf((char)('a' + (i % 26))));
+ }
+ return sb.toString();
+ }
+
public static void assertIntEquals(int expected, int result) {
if (expected != result) {
throw new Error("Expected: " + expected + ", found: " + result);
@@ -35,6 +50,12 @@
}
}
+ public static void assertStringEquals(String expected, String result) {
+ if (!expected.equals(result)) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
public static void assertStringContains(String searchTerm, String result) {
if (result == null || !result.contains(searchTerm)) {
throw new Error("Search term: " + searchTerm + ", not found in: " + result);
@@ -59,6 +80,30 @@
assertCharEquals('c', $opt$noinline$stringCharAt("abc", 2));
assertCharEquals('7', $opt$noinline$stringCharAt("0123456789", 7));
+ // Single character.
+ assertStringEquals("a", stringGetCharsAndBack("a"));
+ // Strings < 8 characters.
+ assertStringEquals("foobar", stringGetCharsAndBack("foobar"));
+ // Strings > 8 characters of various lengths.
+ assertStringEquals(smallString, stringGetCharsAndBack(smallString));
+ assertStringEquals(mediumString, stringGetCharsAndBack(mediumString));
+ assertStringEquals(largeString, stringGetCharsAndBack(largeString));
+
+ // Get only a substring:
+ // Substring < 8 characters.
+ assertStringEquals(smallString.substring(5, 10), stringGetCharsRange(smallString, 5, 10, 0));
+ // Substring > 8 characters.
+ assertStringEquals(smallString.substring(7, 28), stringGetCharsRange(smallString, 7, 28, 0));
+
+ // Get full string with offset in the char array.
+ assertStringEquals(smallString, stringGetCharsAndBackOffset(smallString, 17));
+
+ // Get a substring with an offset in the char array.
+ // Substring < 8 characters.
+ assertStringEquals(smallString.substring(5, 10), stringGetCharsRange(smallString, 5, 10, 17));
+ // Substring > 8 characters.
+ assertStringEquals(smallString.substring(7, 28), stringGetCharsRange(smallString, 7, 28, 17));
+
try {
$opt$noinline$stringCharAt("abc", -1);
throw new Error("Should throw SIOOB.");
@@ -431,4 +476,22 @@
throw new Error(ex);
}
}
+
+ public static String stringGetCharsAndBack(String src) {
+ char[] dst = new char[src.length()];
+ src.getChars(0, src.length(), dst, 0);
+ return new String(dst);
+ }
+
+ public static String stringGetCharsAndBackOffset(String src, int offset) {
+ char[] dst = new char[src.length() + offset];
+ src.getChars(0, src.length(), dst, offset);
+ return new String(dst, offset, src.length());
+ }
+
+ public static String stringGetCharsRange(String src, int srcBegin, int srcEnd, int offset) {
+ char[] dst = new char[srcEnd - srcBegin + offset];
+ src.getChars(srcBegin, srcEnd, dst, offset);
+ return new String(dst, offset, srcEnd - srcBegin);
+ }
}