summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Santiago Aboy Solanes <solanes@google.com> 2024-11-07 15:15:41 +0000
committer Santiago Aboy Solanes <solanes@google.com> 2024-11-12 14:13:01 +0000
commit20cdc427d5f5875c2d58f9fde775957ad2e28cfd (patch)
tree2a4c8b3d71e54273745fe161a82905a6754013ee
parent83668f93e29877e8cab86bcd25fba90412981518 (diff)
Add missing Location::kNoOutputOverlap
This can save some ParallelMove instructions. For x86(_64) all FPToFP intrinsics can add it. For RISC-V, MathSqrt can add it but the ones that call GenDoubleRound can't. Also, we can add it for MathMultiplyHigh. Test: art/test/testrunner/testrunner.py --host --64 -b --optimizing Test: LUCI run https://ci.chromium.org/b/8731845964396026257 Change-Id: I28e13caf84cd850566538efbd285c0264ce80a1a
-rw-r--r--compiler/optimizing/intrinsics_riscv64.cc11
-rw-r--r--compiler/optimizing/intrinsics_x86.cc2
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc3
3 files changed, 10 insertions, 6 deletions
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
index 009fedeb6d..7fc5cfb764 100644
--- a/compiler/optimizing/intrinsics_riscv64.cc
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -19,6 +19,7 @@
#include "code_generator_riscv64.h"
#include "intrinsic_objects.h"
#include "intrinsics_utils.h"
+#include "optimizing/locations.h"
#include "well_known_classes.h"
namespace art HIDDEN {
@@ -160,11 +161,13 @@ static void CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator* allocator, HInvok
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+static void CreateFPToFPLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ Location::OutputOverlap overlaps = Location::kOutputOverlap) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), overlaps);
}
void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
@@ -5329,7 +5332,7 @@ void IntrinsicCodeGeneratorRISCV64::VisitMathTanh(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderRISCV64::VisitMathSqrt(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
+ CreateFPToFPLocations(allocator_, invoke, Location::kNoOutputOverlap);
}
void IntrinsicCodeGeneratorRISCV64::VisitMathSqrt(HInvoke* invoke) {
@@ -5452,7 +5455,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
void IntrinsicCodeGeneratorRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index f71689230d..952fb855be 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -310,7 +310,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 5e7c3a9611..85c33d312f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -37,6 +37,7 @@
#include "mirror/string.h"
#include "optimizing/code_generator.h"
#include "optimizing/data_type.h"
+#include "optimizing/locations.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
#include "utils/x86_64/assembler_x86_64.h"
@@ -329,7 +330,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {