From bda29056665961578f2b97cd6d40daca2058694d Mon Sep 17 00:00:00 2001 From: Olga Mikhaltsova Date: Mon, 22 Jul 2024 18:16:20 +0000 Subject: riscv64: implement signum{float|double} and copySign{float|double} intrinsics Performance improvement: copySign(double) -29% copySign(float) -33% signum(double) -20% signum(float) -22% Test: testrunner.py --target --64 --ndebug --optimizing Change-Id: I3b69a4d35a0b37e5debed9f16c3a757a4204387e --- compiler/optimizing/intrinsics_riscv64.cc | 86 +++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'compiler/optimizing/intrinsics_riscv64.cc') diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 9f1ac08a26..eec73ea052 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -5526,6 +5526,92 @@ void IntrinsicCodeGeneratorRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Bind(&done); } +void GenMathSignum(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + FRegister in = locations->InAt(0).AsFpuRegister(); + Riscv64Assembler* assembler = codegen->GetAssembler(); + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + FRegister ftmp = srs.AllocateFRegister(); + Riscv64Label done; + + if (type == DataType::Type::kFloat64) { + // 0x3FF0000000000000L = 1.0 + __ Li(tmp, 0x3FF0000000000000L); + __ FMvDX(ftmp, tmp); + __ FClassD(tmp, in); + } else { + // 0x3f800000 = 1.0f + __ Li(tmp, 0x3F800000); + __ FMvWX(ftmp, tmp); + __ FClassS(tmp, in); + } + + __ Andi(tmp, tmp, kPositiveZero | kNegativeZero | kSignalingNaN | kQuietNaN); + __ Bnez(tmp, &done); + + if (type == DataType::Type::kFloat64) { + __ FSgnjD(in, ftmp, in); + } else { + __ FSgnjS(in, ftmp, in); + } + + __ Bind(&done); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathSignumDouble(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathSignumDouble(HInvoke* invoke) { + GenMathSignum(codegen_, invoke, DataType::Type::kFloat64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathSignumFloat(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathSignumFloat(HInvoke* invoke) { + GenMathSignum(codegen_, invoke, DataType::Type::kFloat32); +} + +void GenMathCopySign(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + FRegister in0 = locations->InAt(0).AsFpuRegister(); + FRegister in1 = locations->InAt(1).AsFpuRegister(); + FRegister out = locations->Out().AsFpuRegister(); + + if (type == DataType::Type::kFloat64) { + __ FSgnjD(out, in0, in1); + } else { + __ FSgnjS(out, in0, in1); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignDouble(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignDouble(HInvoke* invoke) { + GenMathCopySign(codegen_, invoke, DataType::Type::kFloat64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignFloat(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignFloat(HInvoke* invoke) { + GenMathCopySign(codegen_, invoke, DataType::Type::kFloat32); +} + #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name) UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED); #undef MARK_UNIMPLEMENTED -- cgit v1.2.3-59-g8ed1b