diff options
author | 2024-11-07 15:15:41 +0000 | |
---|---|---|
committer | 2024-11-12 14:13:01 +0000 | |
commit | 20cdc427d5f5875c2d58f9fde775957ad2e28cfd (patch) | |
tree | 2a4c8b3d71e54273745fe161a82905a6754013ee /compiler | |
parent | 83668f93e29877e8cab86bcd25fba90412981518 (diff) |
Add missing Location::kNoOutputOverlap
This can save some ParallelMove instructions.
For x86(_64) all FPToFP intrinsics can add it.
For RISC-V, MathSqrt can add it but the ones that call
GenDoubleRound can't. Also, we can add it for MathMultiplyHigh.
Test: art/test/testrunner/testrunner.py --host --64 -b --optimizing
Test: LUCI run https://ci.chromium.org/b/8731845964396026257
Change-Id: I28e13caf84cd850566538efbd285c0264ce80a1a
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/intrinsics_riscv64.cc | 11 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 3 |
3 files changed, 10 insertions, 6 deletions
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 009fedeb6d..7fc5cfb764 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -19,6 +19,7 @@ #include "code_generator_riscv64.h" #include "intrinsic_objects.h" #include "intrinsics_utils.h" +#include "optimizing/locations.h" #include "well_known_classes.h" namespace art HIDDEN { @@ -160,11 +161,13 @@ static void CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator* allocator, HInvok locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { +static void CreateFPToFPLocations(ArenaAllocator* allocator, + HInvoke* invoke, + Location::OutputOverlap overlaps = Location::kOutputOverlap) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), overlaps); } void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { @@ -5329,7 +5332,7 @@ void IntrinsicCodeGeneratorRISCV64::VisitMathTanh(HInvoke* invoke) { } void IntrinsicLocationsBuilderRISCV64::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); + CreateFPToFPLocations(allocator_, invoke, Location::kNoOutputOverlap); } void IntrinsicCodeGeneratorRISCV64::VisitMathSqrt(HInvoke* invoke) { @@ -5452,7 +5455,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) { new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void IntrinsicCodeGeneratorRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index f71689230d..952fb855be 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -310,7 +310,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 5e7c3a9611..85c33d312f 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -37,6 +37,7 @@ #include "mirror/string.h" #include "optimizing/code_generator.h" #include "optimizing/data_type.h" +#include "optimizing/locations.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" #include "utils/x86_64/assembler_x86_64.h" @@ -329,7 +330,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { |