ARM: Improve String.equals() intrinsic for const strings.
And add tests for the relevant string lengths to 021-string2
and remove obsolete inlining-prevention code from that test.
Also a minor fix of the cutoff check for arm64.
This is a follow-up to the ARM64 change
https://android-review.googlesource.com/336648 .
aosp_angler-userdebug:
before:
arm boot*.oat: 34153044
arm boot*.oat/no string compression: 34107776
after:
arm boot*.oat: 34132740 (−20304)
arm boot*.oat/no string compression: 34099508 (−8268)
The string compression code size difference drops from
45268 to 33232.
Test: m test-art-target on Nexus 6P
Test: m test-art-target on Nexus 6P with string compression disabled.
Bug: 31040547
Change-Id: I4ec73d444d9f56aaf8aa08369c830f398df2168a
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 8b4044d..7c43f2e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1713,6 +1713,22 @@
}
}
+// The cut off for unrolling the loop in String.equals() intrinsic for const strings.
+// The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
+// and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
+// instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
+// Allow up to 12 instructions (32 bytes) for the unrolled loop.
+constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
+
+static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
+ if (candidate->IsLoadString()) {
+ HLoadString* load_string = candidate->AsLoadString();
+ const DexFile& dex_file = load_string->GetDexFile();
+ return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
+ }
+ return nullptr;
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -1720,12 +1736,29 @@
InvokeRuntimeCallingConventionARMVIXL calling_convention;
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
+
// Temporary registers to store lengths of strings and for calculations.
// Using instruction cbz requires a low register, so explicitly set a temp to be R0.
locations->AddTemp(LocationFrom(r0));
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
+ // For the generic implementation and for long const strings we need an extra temporary.
+ // We do not need it for short const strings, up to 4 bytes, see code generation below.
+ uint32_t const_string_length = 0u;
+ const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
+ if (const_string == nullptr) {
+ const_string = GetConstString(invoke->InputAt(1), &const_string_length);
+ }
+ bool is_compressed =
+ mirror::kUseStringCompression &&
+ const_string != nullptr &&
+ mirror::String::DexFileStringAllASCII(const_string, const_string_length);
+ if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+
+ // TODO: If the String.equals() is used only for an immediately following HIf, we can
+ // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
+ // Then we shall need an extra temporary register instead of the output register.
locations->SetOut(Location::RequiresRegister());
}
@@ -1738,8 +1771,6 @@
vixl32::Register out = OutputRegister(invoke);
vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
- vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
- vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
vixl32::Label loop;
vixl32::Label end;
@@ -1771,53 +1802,110 @@
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
__ Ldr(temp, MemOperand(str, class_offset));
- __ Ldr(temp1, MemOperand(arg, class_offset));
- __ Cmp(temp, temp1);
+ __ Ldr(out, MemOperand(arg, class_offset));
+ __ Cmp(temp, out);
__ B(ne, &return_false, /* far_target */ false);
}
- // Load `count` fields of this and argument strings.
- __ Ldr(temp, MemOperand(str, count_offset));
- __ Ldr(temp1, MemOperand(arg, count_offset));
- // Check if `count` fields are equal, return false if they're not.
- // Also compares the compression style, if differs return false.
- __ Cmp(temp, temp1);
- __ B(ne, &return_false, /* far_target */ false);
- // Return true if both strings are empty. Even with string compression `count == 0` means empty.
- static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
- "Expecting 0=compressed, 1=uncompressed");
- __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
+ // Check if one of the inputs is a const string. Do not special-case both strings
+ // being const, such cases should be handled by constant folding if needed.
+ uint32_t const_string_length = 0u;
+ const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
+ if (const_string == nullptr) {
+ const_string = GetConstString(invoke->InputAt(1), &const_string_length);
+ if (const_string != nullptr) {
+ std::swap(str, arg); // Make sure the const string is in `str`.
+ }
+ }
+ bool is_compressed =
+ mirror::kUseStringCompression &&
+ const_string != nullptr &&
+ mirror::String::DexFileStringAllASCII(const_string, const_string_length);
+
+ if (const_string != nullptr) {
+ // Load `count` field of the argument string and check if it matches the const string.
+ // Also compares the compression style, if differs return false.
+ __ Ldr(temp, MemOperand(arg, count_offset));
+ __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
+ __ B(ne, &return_false, /* far_target */ false);
+ } else {
+ // Load `count` fields of this and argument strings.
+ __ Ldr(temp, MemOperand(str, count_offset));
+ __ Ldr(out, MemOperand(arg, count_offset));
+ // Check if `count` fields are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
+ __ Cmp(temp, out);
+ __ B(ne, &return_false, /* far_target */ false);
+ }
// Assertions that must hold in order to compare strings 4 bytes at a time.
+ // Ok to do this because strings are zero-padded to kObjectAlignment.
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
- if (mirror::kUseStringCompression) {
- // For string compression, calculate the number of bytes to compare (not chars).
- // This could in theory exceed INT32_MAX, so treat temp as unsigned.
- __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cs); // If uncompressed,
- __ add(cs, temp, temp, temp); // double the byte count.
+ if (const_string != nullptr &&
+ const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
+ : kShortConstStringEqualsCutoffInBytes / 2u)) {
+ // Load and compare the contents. Though we know the contents of the short const string
+ // at compile time, materializing constants may be more code than loading from memory.
+ int32_t offset = value_offset;
+ size_t remaining_bytes =
+ RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
+ while (remaining_bytes > sizeof(uint32_t)) {
+ vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+ UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
+ vixl32::Register temp2 = scratch_scope.Acquire();
+ __ Ldrd(temp, temp1, MemOperand(str, offset));
+ __ Ldrd(temp2, out, MemOperand(arg, offset));
+ __ Cmp(temp, temp2);
+ __ B(ne, &return_false, /* far_label */ false);
+ __ Cmp(temp1, out);
+ __ B(ne, &return_false, /* far_label */ false);
+ offset += 2u * sizeof(uint32_t);
+ remaining_bytes -= 2u * sizeof(uint32_t);
+ }
+ if (remaining_bytes != 0u) {
+ __ Ldr(temp, MemOperand(str, offset));
+ __ Ldr(out, MemOperand(arg, offset));
+ __ Cmp(temp, out);
+ __ B(ne, &return_false, /* far_label */ false);
+ }
+ } else {
+ // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
+
+ if (mirror::kUseStringCompression) {
+ // For string compression, calculate the number of bytes to compare (not chars).
+ // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+ __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
+ ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cs); // If uncompressed,
+ __ add(cs, temp, temp, temp); // double the byte count.
+ }
+
+ vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+ UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
+ vixl32::Register temp2 = scratch_scope.Acquire();
+
+ // Store offset of string value in preparation for comparison loop.
+ __ Mov(temp1, value_offset);
+
+ // Loop to compare strings 4 bytes at a time starting at the front of the string.
+ __ Bind(&loop);
+ __ Ldr(out, MemOperand(str, temp1));
+ __ Ldr(temp2, MemOperand(arg, temp1));
+ __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
+ __ Cmp(out, temp2);
+ __ B(ne, &return_false, /* far_target */ false);
+ // With string compression, we have compared 4 bytes, otherwise 2 chars.
+ __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
+ __ B(hi, &loop, /* far_target */ false);
}
- // Store offset of string value in preparation for comparison loop.
- __ Mov(temp1, value_offset);
-
- // Loop to compare strings 4 bytes at a time starting at the front of the string.
- // Ok to do this because strings are zero-padded to kObjectAlignment.
- __ Bind(&loop);
- __ Ldr(out, MemOperand(str, temp1));
- __ Ldr(temp2, MemOperand(arg, temp1));
- __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
- __ Cmp(out, temp2);
- __ B(ne, &return_false, /* far_target */ false);
- // With string compression, we have compared 4 bytes, otherwise 2 chars.
- __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
- __ B(hi, &loop, /* far_target */ false);
-
// Return true and exit the function.
// If loop does not result in returning false, we return true.
__ Bind(&return_true);