x86/x86-64: Clean up intrinsic codegen for SSE4.1.
Let the normal codegen handle the calls if we're not
emitting intrinsics because of lack of SSE4.1 support.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Change-Id: Ic26e65974231bbcb44ad696100e1fa4469165e41
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 6d7462e..2dad2f1 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -63,11 +63,6 @@
return res->Intrinsified();
}
-static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
- InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
- IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
-}
-
using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
@@ -347,52 +342,23 @@
GetAssembler()->sqrtsd(out, in);
}
-static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
- MoveArguments(invoke, codegen);
-
- DCHECK(invoke->IsInvokeStaticOrDirect());
- codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
- Location::RegisterLocation(EAX));
-
- // Copy the result back to the expected output.
- Location out = invoke->GetLocations()->Out();
- if (out.IsValid()) {
- DCHECK(out.IsRegister());
- codegen->MoveFromReturnRegister(out, invoke->GetType());
- }
-}
-
static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
HInvoke* invoke,
CodeGeneratorX86* codegen) {
// Do we have instruction support?
- if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
- CreateFPToFPLocations(allocator, invoke);
+ if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
return;
}
- // We have to fall back to a call to the intrinsic.
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
- locations->SetOut(Location::FpuRegisterLocation(XMM0));
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
+ CreateFPToFPLocations(allocator, invoke);
}
-static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
- HInvoke* invoke,
- X86Assembler* assembler,
- int round_mode) {
+static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
LocationSummary* locations = invoke->GetLocations();
- if (locations->WillCall()) {
- InvokeOutOfLineIntrinsic(codegen, invoke);
- } else {
- XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ roundsd(out, in, Immediate(round_mode));
- }
+ DCHECK(!locations->WillCall());
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ __ roundsd(out, in, Immediate(round_mode));
}
void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
@@ -400,7 +366,7 @@
}
void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
- GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+ GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
}
void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
@@ -408,7 +374,7 @@
}
void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
- GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+ GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
}
void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
@@ -416,44 +382,33 @@
}
void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
- GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+ GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
}
void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
// Do we have instruction support?
- if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
- HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
- LocationSummary* locations =
- new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- if (static_or_direct->HasSpecialInput() &&
- invoke->InputAt(
- static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
- locations->SetInAt(1, Location::RequiresRegister());
- }
- locations->SetOut(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
+ if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
return;
}
- // We have to fall back to a call to the intrinsic.
+ HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+ DCHECK(static_or_direct != nullptr);
LocationSummary* locations =
- new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
- locations->SetOut(Location::RegisterLocation(EAX));
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
+ new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ if (static_or_direct->HasSpecialInput() &&
+ invoke->InputAt(
+ static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
}
void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
- if (locations->WillCall()) { // TODO: can we reach this?
- InvokeOutOfLineIntrinsic(codegen_, invoke);
- return;
- }
+ DCHECK(!locations->WillCall());
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();