Optimizing String.Equals as an intrinsic (x86_64)
The fourth implementation of String.Equals. I added an intrinsic
in x86_64 which is similar to the original java implementation
of String.equals: an instanceof check, null check,length check,
and reference equality check followed by a loop comparing strings
four characters at a time.
Interesting Benchmarking Values:
Optimizing Compiler on 64-bit Emulator
Intrinsic 1-5 Character Strings: 48 ns
Original 1-5 Character Strings: 56 ns
Intrinsic 1000+ Character Strings: 4009 ns
Original 1000+ Character Strings: 4704 ns
Intrinsic Non-String Argument: 35 ns
Original Non-String Argument: 42 ns
Bug: 21481923
Change-Id: I17d0d2e24a670a898ab1729669d3990403b9a853
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index b4926c2..9ea68ec 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -854,6 +854,97 @@
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+
+ // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
+ locations->AddTemp(Location::RegisterLocation(RCX));
+ locations->AddTemp(Location::RegisterLocation(RDI));
+
+ // Set output, RSI needed for repe_cmpsq instruction anyways.
+ locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
+ X86_64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
+
+ Label end;
+ Label return_true;
+ Label return_false;
+
+ // Get offsets of count, value, and class fields within a string object.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check if input is null, return false if it is.
+ __ testl(arg, arg);
+ __ j(kEqual, &return_false);
+
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ movl(rcx, Address(str, class_offset));
+ __ cmpl(rcx, Address(arg, class_offset));
+ __ j(kNotEqual, &return_false);
+
+ // Reference equality check, return true if same reference.
+ __ cmpl(str, arg);
+ __ j(kEqual, &return_true);
+
+ // Load length of receiver string.
+ __ movl(rcx, Address(str, count_offset));
+ // Check if lengths are equal, return false if they're not.
+ __ cmpl(rcx, Address(arg, count_offset));
+ __ j(kNotEqual, &return_false);
+ // Return true if both strings are empty.
+ __ testl(rcx, rcx);
+ __ j(kEqual, &return_true);
+
+ // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
+ __ leal(rsi, Address(str, value_offset));
+ __ leal(rdi, Address(arg, value_offset));
+
+ // Divide string length by 4 and adjust for lengths not divisible by 4.
+ __ addl(rcx, Immediate(3));
+ __ shrl(rcx, Immediate(2));
+
+ // Assertions that must hold in order to compare strings 4 characters at a time.
+ DCHECK_ALIGNED(value_offset, 8);
+ static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
+
+ // Loop to compare strings four characters at a time starting at the beginning of the string.
+ __ repe_cmpsq();
+ // If strings are not equal, zero flag will be cleared.
+ __ j(kNotEqual, &return_false);
+
+ // Return true and exit the function.
+ // If loop does not result in returning false, we return true.
+ __ Bind(&return_true);
+ __ movl(rsi, Immediate(1));
+ __ jmp(&end);
+
+ // Return false and exit the function.
+ __ Bind(&return_false);
+ __ xorl(rsi, rsi);
+ __ Bind(&end);
+}
+
static void CreateStringIndexOfLocations(HInvoke* invoke,
ArenaAllocator* allocator,
bool start_at_zero) {
@@ -1607,7 +1698,6 @@
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
#undef UNIMPLEMENTED_INTRINSIC