ART: x86 indexOf intrinsics for the optimizing compiler
Add intrinsics implementations for indexOf in the optimizing
compiler. These are mostly ported from Quick. Add instruction
support to assemblers where necessary.
Change-Id: Ife90ed0245532a5c436a26fe84715dc357f353c8
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1fc5432..b37b6b0 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,8 @@
#include "intrinsics_x86_64.h"
+#include <limits>
+
#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator_x86_64.h"
#include "entrypoints/quick/quick_entrypoints.h"
@@ -858,6 +860,157 @@
__ Bind(slow_path->GetExitLabel());
}
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = new (allocator) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ // The data needs to be in RDI for scasw. So request that the string is there, anyways.
+ locations->SetInAt(0, Location::RegisterLocation(RDI));
+ // If we look for a constant char, we'll still have to copy it into RAX. So just request the
+ // allocator to do that, anyways. We can still do the constant check by checking the parameter
+ // of the instruction explicitly.
+ // Note: This works as we don't clobber RAX anywhere.
+ locations->SetInAt(1, Location::RegisterLocation(RAX));
+ if (!start_at_zero) {
+ locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
+ }
+ // As we clobber RDI during execution anyways, also use it as the output.
+ locations->SetOut(Location::SameAsFirstInput());
+
+ // repne scasw uses RCX as the counter.
+ locations->AddTemp(Location::RegisterLocation(RCX));
+ // Need another temporary to be able to compute the result.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ // Check our assumptions for registers.
+ DCHECK_EQ(string_obj.AsRegister(), RDI);
+ DCHECK_EQ(search_value.AsRegister(), RAX);
+ DCHECK_EQ(counter.AsRegister(), RCX);
+ DCHECK_EQ(out.AsRegister(), RDI);
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeX86_64* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+ std::numeric_limits<uint16_t>::max()) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+ slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ j(kAbove, slow_path->GetEntryLabel());
+ }
+
+ // From here down, we know that we are looking for a char that fits in 16 bits.
+ // Location of reference to data array within the String object.
+ int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count within the String object.
+ int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+ // Load string length, i.e., the count field of the string.
+ __ movl(string_length, Address(string_obj, count_offset));
+
+ // Do a length check.
+ // TODO: Support jecxz.
+ Label not_found_label;
+ __ testl(string_length, string_length);
+ __ j(kEqual, ¬_found_label);
+
+ if (start_at_zero) {
+ // Number of chars to scan is the same as the string length.
+ __ movl(counter, string_length);
+
+ // Move to the start of the string.
+ __ addq(string_obj, Immediate(value_offset));
+ } else {
+ CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
+
+ // Do a start_index check.
+ __ cmpl(start_index, string_length);
+ __ j(kGreaterEqual, ¬_found_label);
+
+ // Ensure we have a start index >= 0;
+ __ xorl(counter, counter);
+ __ cmpl(start_index, Immediate(0));
+ __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
+
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+ // Now update ecx, the work counter: it's gonna be string.length - start_index.
+ __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
+ __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+ }
+
+ // Everything is set up for repne scasw:
+ // * Comparison address in RDI.
+ // * Counter in ECX.
+ __ repne_scasw();
+
+ // Did we find a match?
+ __ j(kNotEqual, ¬_found_label);
+
+ // Yes, we matched. Compute the index of the result.
+ __ subl(string_length, counter);
+ __ leal(out, Address(string_length, -1));
+
+ Label done;
+ __ jmp(&done);
+
+ // Failed to match; return -1.
+ __ Bind(¬_found_label);
+ __ movl(out, Immediate(-1));
+
+ // And join up at the end.
+ __ Bind(&done);
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCall,
@@ -1434,8 +1587,6 @@
}
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)