riscv64: implement signum{float|double} and copySign{float|double} intrinsics

Performance improvement: copySign(double) -29% copySign(float) -33% signum(double) -20% signum(float) -22% Test: testrunner.py --target --64 --ndebug --optimizing Change-Id: I3b69a4d35a0b37e5debed9f16c3a757a4204387e
author: Olga Mikhaltsova <olga.mikhaltsova@syntacore.com> 2024-07-22 18:16:20 +0000
committer: Treehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com> 2024-08-12 15:17:18 +0000
commit: bda29056665961578f2b97cd6d40daca2058694d (patch)
tree: ff3df2723b2c19ac393dbbdb6137e296e94e2868 /compiler/optimizing
parent: 3244be57e9c170696b0a17369bcf3e77f82ee9ec (diff)
5 files changed, 102 insertions, 0 deletions
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0a984c637b..a64615fc31 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -125,6 +125,10 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi
 Location ARM64ReturnLocation(DataType::Type return_type);
 
 #define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \
+  V(MathSignumFloat)                          \
+  V(MathSignumDouble)                         \
+  V(MathCopySignFloat)                        \
+  V(MathCopySignDouble)                       \
   V(IntegerRemainderUnsigned)                 \
   V(LongRemainderUnsigned)                    \
   V(StringStringIndexOf)                      \
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index aac823627d..02f58dc178 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -121,6 +121,10 @@ using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>;
 using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>;
 
 #define UNIMPLEMENTED_INTRINSIC_LIST_ARM(V)                                \
+  V(MathSignumFloat)                                                       \
+  V(MathSignumDouble)                                                      \
+  V(MathCopySignFloat)                                                     \
+  V(MathCopySignDouble)                                                    \
   V(MathRoundDouble) /* Could be done by changing rounding mode, maybe? */ \
   V(UnsafeCASLong)   /* High register pressure */                          \
   V(SystemArrayCopyChar)                                                   \
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 6ce0c506a0..456ce51bbf 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -49,6 +49,10 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
 
 #define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \
+  V(MathSignumFloat)                        \
+  V(MathSignumDouble)                       \
+  V(MathCopySignFloat)                      \
+  V(MathCopySignDouble)                     \
   V(MathRoundDouble)                        \
   V(FloatIsInfinite)                        \
   V(DoubleIsInfinite)                       \
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ddeb33a261..cbb4b17fe5 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -54,6 +54,10 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength =
 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
 
 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \
+  V(MathSignumFloat)                           \
+  V(MathSignumDouble)                          \
+  V(MathCopySignFloat)                         \
+  V(MathCopySignDouble)                        \
   V(CRC32Update)                               \
   V(CRC32UpdateBytes)                          \
   V(CRC32UpdateByteBuffer)                     \
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
index 9f1ac08a26..eec73ea052 100644
--- a/compiler/optimizing/intrinsics_riscv64.cc
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -5526,6 +5526,92 @@ void IntrinsicCodeGeneratorRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke)
   __ Bind(&done);
 }
 
+void GenMathSignum(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  Riscv64Assembler* assembler = codegen->GetAssembler();
+  ScratchRegisterScope srs(assembler);
+  XRegister tmp = srs.AllocateXRegister();
+  FRegister ftmp = srs.AllocateFRegister();
+  Riscv64Label done;
+
+  if (type == DataType::Type::kFloat64) {
+    // 0x3FF0000000000000L = 1.0
+    __ Li(tmp, 0x3FF0000000000000L);
+    __ FMvDX(ftmp, tmp);
+    __ FClassD(tmp, in);
+  } else {
+    // 0x3f800000 = 1.0f
+    __ Li(tmp, 0x3F800000);
+    __ FMvWX(ftmp, tmp);
+    __ FClassS(tmp, in);
+  }
+
+  __ Andi(tmp, tmp, kPositiveZero | kNegativeZero | kSignalingNaN | kQuietNaN);
+  __ Bnez(tmp, &done);
+
+  if (type == DataType::Type::kFloat64) {
+    __ FSgnjD(in, ftmp, in);
+  } else {
+    __ FSgnjS(in, ftmp, in);
+  }
+
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
+  LocationSummary* locations =
+      new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
+  GenMathSignum(codegen_, invoke, DataType::Type::kFloat64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
+  LocationSummary* locations =
+      new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
+  GenMathSignum(codegen_, invoke, DataType::Type::kFloat32);
+}
+
+void GenMathCopySign(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
+  Riscv64Assembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+  FRegister in0 = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister in1 = locations->InAt(1).AsFpuRegister<FRegister>();
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  if (type == DataType::Type::kFloat64) {
+    __ FSgnjD(out, in0, in1);
+  } else {
+    __ FSgnjS(out, in0, in1);
+  }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
+  GenMathCopySign(codegen_, invoke, DataType::Type::kFloat64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
+  GenMathCopySign(codegen_, invoke, DataType::Type::kFloat32);
+}
+
 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name)
 UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED);
 #undef MARK_UNIMPLEMENTED
author	Olga Mikhaltsova <olga.mikhaltsova@syntacore.com>	2024-07-22 18:16:20 +0000
committer	Treehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com>	2024-08-12 15:17:18 +0000
commit	bda29056665961578f2b97cd6d40daca2058694d (patch)
tree	ff3df2723b2c19ac393dbbdb6137e296e94e2868 /compiler/optimizing
parent	3244be57e9c170696b0a17369bcf3e77f82ee9ec (diff)