diff options
author | 2024-07-22 18:16:20 +0000 | |
---|---|---|
committer | 2024-08-12 15:17:18 +0000 | |
commit | bda29056665961578f2b97cd6d40daca2058694d (patch) | |
tree | ff3df2723b2c19ac393dbbdb6137e296e94e2868 /compiler/optimizing | |
parent | 3244be57e9c170696b0a17369bcf3e77f82ee9ec (diff) |
riscv64: implement signum{float|double} and copySign{float|double} intrinsics
Performance improvement:
copySign(double) -29%
copySign(float) -33%
signum(double) -20%
signum(float) -22%
Test: testrunner.py --target --64 --ndebug --optimizing
Change-Id: I3b69a4d35a0b37e5debed9f16c3a757a4204387e
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_riscv64.cc | 86 |
5 files changed, 102 insertions, 0 deletions
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 0a984c637b..a64615fc31 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -125,6 +125,10 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi Location ARM64ReturnLocation(DataType::Type return_type); #define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \ + V(MathSignumFloat) \ + V(MathSignumDouble) \ + V(MathCopySignFloat) \ + V(MathCopySignDouble) \ V(IntegerRemainderUnsigned) \ V(LongRemainderUnsigned) \ V(StringStringIndexOf) \ diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index aac823627d..02f58dc178 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -121,6 +121,10 @@ using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>; using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>; #define UNIMPLEMENTED_INTRINSIC_LIST_ARM(V) \ + V(MathSignumFloat) \ + V(MathSignumDouble) \ + V(MathCopySignFloat) \ + V(MathCopySignDouble) \ V(MathRoundDouble) /* Could be done by changing rounding mode, maybe? */ \ V(UnsafeCASLong) /* High register pressure */ \ V(SystemArrayCopyChar) \ diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 6ce0c506a0..456ce51bbf 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -49,6 +49,10 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); #define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \ + V(MathSignumFloat) \ + V(MathSignumDouble) \ + V(MathCopySignFloat) \ + V(MathCopySignDouble) \ V(MathRoundDouble) \ V(FloatIsInfinite) \ V(DoubleIsInfinite) \ diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index ddeb33a261..cbb4b17fe5 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -54,6 +54,10 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ + V(MathSignumFloat) \ + V(MathSignumDouble) \ + V(MathCopySignFloat) \ + V(MathCopySignDouble) \ V(CRC32Update) \ V(CRC32UpdateBytes) \ V(CRC32UpdateByteBuffer) \ diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 9f1ac08a26..eec73ea052 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -5526,6 +5526,92 @@ void IntrinsicCodeGeneratorRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Bind(&done); } +void GenMathSignum(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + Riscv64Assembler* assembler = codegen->GetAssembler(); + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + FRegister ftmp = srs.AllocateFRegister(); + Riscv64Label done; + + if (type == DataType::Type::kFloat64) { + // 0x3FF0000000000000L = 1.0 + __ Li(tmp, 0x3FF0000000000000L); + __ FMvDX(ftmp, tmp); + __ FClassD(tmp, in); + } else { + // 0x3f800000 = 1.0f + __ Li(tmp, 0x3F800000); + __ FMvWX(ftmp, tmp); + __ FClassS(tmp, in); + } + + __ Andi(tmp, tmp, kPositiveZero | kNegativeZero | kSignalingNaN | kQuietNaN); + __ Bnez(tmp, &done); + + if (type == DataType::Type::kFloat64) { + __ FSgnjD(in, ftmp, in); + } else { + __ FSgnjS(in, ftmp, in); + } + + __ Bind(&done); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathSignumDouble(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathSignumDouble(HInvoke* invoke) { + GenMathSignum(codegen_, invoke, DataType::Type::kFloat64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathSignumFloat(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathSignumFloat(HInvoke* invoke) { + GenMathSignum(codegen_, invoke, DataType::Type::kFloat32); +} + +void GenMathCopySign(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + FRegister in0 = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister in1 = locations->InAt(1).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + if (type == DataType::Type::kFloat64) { + __ FSgnjD(out, in0, in1); + } else { + __ FSgnjS(out, in0, in1); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignDouble(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignDouble(HInvoke* invoke) { + GenMathCopySign(codegen_, invoke, DataType::Type::kFloat64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignFloat(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignFloat(HInvoke* invoke) { + GenMathCopySign(codegen_, invoke, DataType::Type::kFloat32); +} + #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name) UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED); #undef MARK_UNIMPLEMENTED |