riscv64: float opcodes

Test: Run these opcodes against all interpreter
tests on a Linux RISC-V VM.

(1) setup
  lunch aosp_riscv64-userdebug

  export ART_TEST_SSH_USER=ubuntu
  export ART_TEST_SSH_HOST=localhost
  export ART_TEST_SSH_PORT=10001
  export ART_TEST_ON_VM=true

  . art/tools/buildbot-utils.sh
  art/tools/buildbot-build.sh --target

  # Create, boot and configure the VM.
  art/tools/buildbot-vm.sh create
  art/tools/buildbot-vm.sh boot
  art/tools/buildbot-vm.sh setup-ssh  # password: 'ubuntu'

  art/tools/buildbot-cleanup-device.sh
  art/tools/buildbot-setup-device.sh
  art/tools/buildbot-sync.sh

(2) test
  art/test.py --target -r --no-prebuild --ndebug --64  -j 12 --cdex-none --interpreter

Clean with `m check_cfi` too.
Also exercised on cuttlefish boot. No SIGSEGV or SIGILL noted.

Bug: 283082047

Change-Id: If7fa3163eba49b6e44dfc02425eade01f54003b7
diff --git a/runtime/interpreter/mterp/riscv64/floating_point.S b/runtime/interpreter/mterp/riscv64/floating_point.S
index 5bcfce9..ba64624 100644
--- a/runtime/interpreter/mterp/riscv64/floating_point.S
+++ b/runtime/interpreter/mterp/riscv64/floating_point.S
@@ -1,40 +1,7 @@
 // Note: Floating point operations must follow IEEE 754 rules, using round-to-nearest and gradual
 // underflow, except where stated otherwise.
 
-%def fbinop(instr=""):
-    unimp
-
-%def fbinop2addr(instr=""):
-    unimp
-
-%def fbinopWide(instr=""):
-    unimp
-
-%def fbinopWide2addr(instr=""):
-    unimp
-
-%def funop(instr=""):
-    unimp
-
-%def funopNarrower(instr=""):
-    unimp
-
-%def funopWider(instr=""):
-    unimp
-
-%def op_add_double():
-    unimp
-
-%def op_add_double_2addr():
-    unimp
-
-%def op_add_float():
-    unimp
-
-%def op_add_float_2addr():
-    unimp
-
-%def op_cmpg_double():
+%def op_cmpl_float():
     unimp
 
 %def op_cmpg_float():
@@ -43,89 +10,329 @@
 %def op_cmpl_double():
     unimp
 
-%def op_cmpl_float():
+%def op_cmpg_double():
     unimp
 
-%def op_div_double():
-    unimp
+//
+// funop vA, vB
+// Format 12x: B|A|op
+//
 
-%def op_div_double_2addr():
-    unimp
-
-%def op_div_float():
-    unimp
-
-%def op_div_float_2addr():
-    unimp
-
-%def op_double_to_float():
-    unimp
-
-%def op_double_to_int():
-    unimp
-
-%def op_double_to_long():
-    unimp
-
-%def op_float_to_double():
-    unimp
-
-%def op_float_to_int():
-    unimp
-
-%def op_float_to_long():
-    unimp
-
-%def op_int_to_double():
-    unimp
-
-%def op_int_to_float():
-    unimp
-
-%def op_long_to_double():
-    unimp
-
-%def op_long_to_float():
-    unimp
-
-%def op_mul_double():
-    unimp
-
-%def op_mul_double_2addr():
-    unimp
-
-%def op_mul_float():
-    unimp
-
-%def op_mul_float_2addr():
-    unimp
-
-%def op_neg_double():
-    unimp
-
+// neg-float vA, vB
+// Format 12x: B|A|7f
 %def op_neg_float():
-    unimp
+%   generic_funop(instr="fneg.s ft0, ft0", dst="s", src="s")
 
-%def op_rem_double():
-    unimp
+// neg-double vA, vB
+// Format 12x: B|A|80
+%def op_neg_double():
+%   generic_funop(instr="fneg.d ft0, ft0", dst="d", src="d")
 
-%def op_rem_double_2addr():
-    unimp
+// int-to-float vA, vB
+// Format 12x: B|A|82
+// Note: Conversion of int32 to float, using round-to-nearest. This loses precision for some values.
+// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats.
+%def op_int_to_float():
+%   generic_funop(instr="fcvt.s.w ft0, t1, rne", dst="s", src="w")
 
-%def op_rem_float():
-    unimp
+// int-to-double vA, vB
+// Format 12x: B|A|83
+// Note: Conversion of int32 to double.
+%def op_int_to_double():
+%   generic_funop(instr="fcvt.d.w ft0, t1", dst="d", src="w")
 
-%def op_rem_float_2addr():
-    unimp
+// long-to-float vA, vB
+// Format 12x: B|A|85
+// Note: Conversion of int64 to float, using round-to-nearest. This loses precision for some values.
+// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats.
+%def op_long_to_float():
+%   generic_funop(instr="fcvt.s.l ft0, t1, rne", dst="s", src="l")
 
-%def op_sub_double():
-    unimp
+// long-to-double vA, vB
+// Format 12x: B|A|86
+// Note: Conversion of int64 to double, using round-to-nearest. This loses precision for some values.
+// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats.
+%def op_long_to_double():
+%   generic_funop(instr="fcvt.d.l ft0, t1, rne", dst="d", src="l")
 
-%def op_sub_double_2addr():
-    unimp
+// float-to-int vA, vB
+// Format 12x: B|A|87
+// Note: Conversion of float to int32, using round-toward-zero. NaN and -0.0 (negative zero)
+// convert to the integer 0. Infinities and values with too large a magnitude to be represented
+// get converted to either 0x7fffffff or -0x80000000 depending on sign.
+//
+// FCVT.W.S RTZ has the following behavior:
+// - NaN rounds to 0x7ffffff - requires check and set to zero.
+// - negative zero rounds to zero - matches dex spec.
+// - pos inf rounds to 0x7fffffff - matches dex spec.
+// - neg inf rounds to 0x80000000 - matches dex spec.
+%def op_float_to_int():
+%   generic_funop(instr="fcvt.w.s t1, ft0, rtz", dst="w", src="s", nan_zeroed="1")
 
+// float-to-long vA, vB
+// Format 12x: B|A|88
+// Note: Conversion of float to int64, using round-toward-zero. The same special case rules as for
+// float-to-int apply here, except that out-of-range values get converted to either
+// 0x7fffffffffffffff or -0x8000000000000000 depending on sign.
+//
+// FCVT.L.S RTZ has the following behavior:
+// - NaN rounds to 0x7fffffffffffffff - requires check and set to zero.
+// - negative zero rounds to zero - matches dex spec.
+// - pos inf rounds to 0x7fffffffffffffff - matches dex spec.
+// - neg inf rounds to 0x8000000000000000 - matches dex spec.
+%def op_float_to_long():
+%   generic_funop(instr="fcvt.l.s t1, ft0, rtz", dst="l", src="s", nan_zeroed="1")
+
+// float-to-double vA, vB
+// Format 12x: B|A|89
+// Note: Conversion of float to double, preserving the value exactly.
+%def op_float_to_double():
+%   generic_funop(instr="fcvt.d.s ft0, ft0", dst="d", src="s")
+
+// double-to-int vA, vB
+// Format 12x: B|A|8a
+// Note: Conversion of double to int32, using round-toward-zero. The same special case rules as for
+// float-to-int apply here.
+%def op_double_to_int():
+%   generic_funop(instr="fcvt.w.d t1, ft0, rtz", dst="w", src="d", nan_zeroed="1")
+
+// double-to-long vA, vB
+// Format 12x: B|A|8b
+// Note: Conversion of double to int64, using round-toward-zero. The same special case rules as for
+// float-to-long apply here.
+%def op_double_to_long():
+%   generic_funop(instr="fcvt.l.d t1, ft0, rtz", dst="l", src="d", nan_zeroed="1")
+
+// double-to-float vA, vB
+// Format 12x: B|A|8c
+// Note: Conversion of double to float, using round-to-nearest. This loses precision for some values.
+// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats.
+%def op_double_to_float():
+%   generic_funop(instr="fcvt.s.d ft0, ft0, rne", dst="s", src="d")
+
+// unop boilerplate
+// instr: operand held in t1 or ft0, result written to t1 or ft0.
+// instr must not clobber t2.
+// dst: one of w (int32), l (int64), s (float), d (double)
+// src: one of w (int32), l (int64), s (float), d (double)
+// Clobbers: ft0, t0, t1, t2
+%def generic_funop(instr="", dst="", src="", nan_zeroed="0"):
+    srliw t0, xINST, 12        // t0 := B
+    srliw t2, xINST, 8         // t2 := B|A
+
+    .ifc $src, w
+      GET_VREG t1, t0          // t1 := fp[B]
+    .endif
+    .ifc $src, l
+      GET_VREG_WIDE t1, t0     // t1 := fp[B]
+    .endif
+    .ifc $src, s
+      GET_VREG_FLOAT ft0, t0   // ft0 := fp[B]
+    .endif
+    .ifc $src, d
+      GET_VREG_DOUBLE ft0, t0  // ft0 := fp[B]
+    .endif
+
+    and t2, t2, 0xF            // t2 := A
+    FETCH_ADVANCE_INST 1       // advance xPC, load xINST
+    .if $nan_zeroed
+      // Okay to clobber T1. It is not read if nan_zeroed=1.
+      .ifc $src, s
+        fclass.s t1, ft0
+      .endif
+      .ifc $src, d
+        fclass.d t1, ft0
+      .endif
+      sltiu t1, t1, 0x100  // t1 := 0 if NaN, per dex spec. Skip the conversion.
+      beqz t1, 1f
+    .endif
+    $instr                     // read operand (from t1|ft0), write result (to t1|ft0)
+                               // do not clobber t2!
+1:
+
+    .ifc $dst, w
+      SET_VREG t1, t2          // fp[A] := t1
+    .endif
+    .ifc $dst, l
+      SET_VREG_WIDE t1, t2     // fp[A] := t1
+    .endif
+    .ifc $dst, s
+      SET_VREG_FLOAT ft0, t2   // fp[A] := ft0
+    .endif
+    .ifc $dst, d
+      SET_VREG_DOUBLE ft0, t2  // fp[B] := ft0
+    .endif
+
+    GET_INST_OPCODE t0         // t0 holds next opcode
+    GOTO_OPCODE t0             // continue to next
+
+//
+// fbinop vAA, vBB, vCC
+// Format 23x: AA|op CC|BB
+//
+
+// add-float vAA, vBB, vCC
+// Format 23x: AA|a6 CC|BB
+%def op_add_float():
+%   generic_fbinop(instr="fadd.s fa0, fa0, fa1, rne")
+
+// sub-float vAA, vBB, vCC
+// Format 23x: AA|a7 CC|BB
 %def op_sub_float():
-    unimp
+%   generic_fbinop(instr="fsub.s fa0, fa0, fa1, rne")
 
+// mul-float vAA, vBB, vCC
+// Format 23x: AA|a8 CC|BB
+%def op_mul_float():
+%   generic_fbinop(instr="fmul.s fa0, fa0, fa1, rne")
+
+// div-float vAA, vBB, vCC
+// Format 23x: AA|a9 CC|BB
+%def op_div_float():
+%   generic_fbinop(instr="fdiv.s fa0, fa0, fa1, rne")
+
+// rem-float vAA, vBB, vCC
+// Format 23x: AA|aa CC|BB
+// Note: Floating point remainder after division. This function is different than IEEE 754 remainder
+// and is defined as result == a - roundTowardZero(a / b) * b.
+// Note: RISC-V does not offer floating point remainder; use fmodf in libm.
+%def op_rem_float():
+%   generic_fbinop(instr="call fmodf")
+
+// add-double vAA, vBB, vCC
+// Format 23x: AA|ab CC|BB
+%def op_add_double():
+%   generic_fbinop(instr="fadd.d fa0, fa0, fa1, rne", is_double="1")
+
+// sub-double vAA, vBB, vCC
+// Format 23x: AA|ac CC|BB
+%def op_sub_double():
+%   generic_fbinop(instr="fsub.d fa0, fa0, fa1, rne", is_double="1")
+
+// mul-double vAA, vBB, vCC
+// Format 23x: AA|ad CC|BB
+%def op_mul_double():
+%   generic_fbinop(instr="fmul.d fa0, fa0, fa1, rne", is_double="1")
+
+// div-double vAA, vBB, vCC
+// Format 23x: AA|ae CC|BB
+%def op_div_double():
+%   generic_fbinop(instr="fdiv.d fa0, fa0, fa1, rne", is_double="1")
+
+// rem-double vAA, vBB, vCC
+// Format 23x: AA|af CC|BB
+// Note: Floating point remainder after division. This function is different than IEEE 754 remainder
+// and is defined as result == a - roundTowardZero(a / b) * b.
+// Note: RISC-V does not offer floating point remainder; use fmod in libm.
+%def op_rem_double():
+%   generic_fbinop(instr="call fmod", is_double="1")
+
+// fbinop boilerplate
+// instr: operands held in fa0 and fa1, result written to fa0
+// instr may be a libm call, so:
+//  - avoid caller-save state across instr; s11 is used instead.
+//  - fa0 and fa1 are used instead of ft0 and ft1.
+//
+// The is_double flag ensures vregs are read and written in 64-bit widths.
+// Clobbers: t0, t1, fa0, fa1, s11
+%def generic_fbinop(instr="", is_double="0"):
+    FETCH t0, count=1     // t0 := CC|BB
+    srliw s11, xINST, 8   // s11 := AA
+    srliw t1, t0, 8       // t1 := CC
+    and t0, t0, 0xFF      // t0 := BB
+    GET_VREG_FLOAT fa1, t1, is_double=$is_double
+                          // fa1 := fp[CC]
+    GET_VREG_FLOAT fa0, t0, is_double=$is_double
+                          // fa0 := fp[BB]
+    FETCH_ADVANCE_INST 2  // advance xPC, load xINST
+    $instr                // read fa0 and fa1, write result to fa0.
+                          // instr may be a function call.
+    SET_VREG_FLOAT fa0, s11, is_double=$is_double
+                          // fp[AA] := fa0
+    GET_INST_OPCODE t0    // t0 holds next opcode
+    GOTO_OPCODE t0        // continue to next
+
+//
+// fbinop/2addr vA, vB
+// Format 12x: B|A|op
+//
+
+// add-float/2addr vA, vB
+// Format 12x: B|A|c6
+%def op_add_float_2addr():
+%   generic_fbinop_2addr(instr="fadd.s fa0, fa0, fa1")
+
+// sub-float/2addr vA, vB
+// Format 12x: B|A|c7
 %def op_sub_float_2addr():
-    unimp
+%   generic_fbinop_2addr(instr="fsub.s fa0, fa0, fa1")
+
+// mul-float/2addr vA, vB
+// Format 12x: B|A|c8
+%def op_mul_float_2addr():
+%   generic_fbinop_2addr(instr="fmul.s fa0, fa0, fa1")
+
+// div-float/2addr vA, vB
+// Format 12x: B|A|c9
+%def op_div_float_2addr():
+%   generic_fbinop_2addr(instr="fdiv.s fa0, fa0, fa1")
+
+// rem-float/2addr vA, vB
+// Format 12x: B|A|ca
+// Note: Floating point remainder after division. This function is different than IEEE 754 remainder
+// and is defined as result == a - roundTowardZero(a / b) * b.
+// Note: RISC-V does not offer floating point remainder; use fmodf in libm.
+%def op_rem_float_2addr():
+%   generic_fbinop_2addr(instr="call fmodf")
+
+// add-double/2addr vA, vB
+// Format 12x: B|A|cb
+%def op_add_double_2addr():
+%   generic_fbinop_2addr(instr="fadd.d fa0, fa0, fa1", is_double="1")
+
+// sub-double/2addr vA, vB
+// Format 12x: B|A|cc
+%def op_sub_double_2addr():
+%   generic_fbinop_2addr(instr="fsub.d fa0, fa0, fa1", is_double="1")
+
+// mul-double/2addr vA, vB
+// Format 12x: B|A|cd
+%def op_mul_double_2addr():
+%   generic_fbinop_2addr(instr="fmul.d fa0, fa0, fa1", is_double="1")
+
+// div-double/2addr vA, vB
+// Format 12x: B|A|ce
+%def op_div_double_2addr():
+%   generic_fbinop_2addr(instr="fdiv.d fa0, fa0, fa1", is_double="1")
+
+// rem-double/2addr vA, vB
+// Format 12x: B|A|cf
+// Note: Floating point remainder after division. This function is different than IEEE 754 remainder
+// and is defined as result == a - roundTowardZero(a / b) * b.
+// Note: RISC-V does not offer floating point remainder; use fmod in libm.
+%def op_rem_double_2addr():
+%   generic_fbinop_2addr(instr="call fmod", is_double="1")
+
+// fbinop/2addr boilerplate
+// instr: operands held in fa0 and fa1, result written to fa0
+// instr may be a libm call, so:
+//  - avoid caller-save state across instr; s11 is used instead.
+//  - use fa0 and fa1 instead of ft0 and ft1.
+//
+// The is_double flag ensures vregs are read and written in 64-bit widths.
+// Clobbers: t0, t1, fa0, fa1, s11
+%def generic_fbinop_2addr(instr="", is_double="0"):
+    srliw t0, xINST, 8       // t0 := B|A
+    srliw t1, xINST, 12      // t1 := B
+    and t0, t0, 0xF          // t0 := A
+    GET_VREG_FLOAT fa1, t1, is_double=$is_double
+                             // fa1 := fp[B]
+    mv s11, t0               // s11 := A
+    GET_VREG_FLOAT fa0, t0, is_double=$is_double
+                             // fa0 := fp[A]
+    FETCH_ADVANCE_INST 1     // advance xPC, load xINST
+    $instr                   // read fa0 and f1, write result to fa0.
+                             // instr may be a function call.
+    GET_INST_OPCODE t1       // t1 holds next opcode
+    SET_VREG_FLOAT fa0, s11  // fp[A] := fa0
+    GOTO_OPCODE t1           // continue to next
diff --git a/runtime/interpreter/mterp/riscv64/main.S b/runtime/interpreter/mterp/riscv64/main.S
index 8fbb4ba..4acca57 100644
--- a/runtime/interpreter/mterp/riscv64/main.S
+++ b/runtime/interpreter/mterp/riscv64/main.S
@@ -426,6 +426,50 @@
     sw \reg, (t0)          // refs[vreg] := reg
 .endm
 
+// Floating-point read, defaults to 32-bit read.
+// Clobbers: \reg, \vreg
+.macro GET_VREG_FLOAT reg, vreg, is_double=0
+    .if \is_double
+      GET_VREG_DOUBLE \reg, \vreg
+    .else
+      slliw \vreg, \vreg, 2  // vreg id to byte offset
+      add \vreg, xFP, \vreg  // vreg addr in register array
+      flw \reg, (\vreg)      // reg := fp[vreg]
+    .endif
+.endm
+
+// Floating-point write, defaults to 32-bit write.
+// Clobbers: t0, \reg, \vreg
+.macro SET_VREG_FLOAT reg, vreg, is_double=0
+    .if \is_double
+      SET_VREG_DOUBLE \reg, \vreg
+    .else
+      slliw \vreg, \vreg, 2  // vreg id to byte offset
+      add t0, xFP, \vreg     // vreg addr in register array
+      fsw \reg, (t0)         // fp[vreg] := reg
+      add t0, xREFS, \vreg   // vreg addr in reference array
+      sw zero, (t0)          // refs[vreg] := null
+    .endif
+.endm
+
+// Floating-point 64 bit read
+// Clobbers: \reg
+.macro GET_VREG_DOUBLE reg, vreg
+    slliw \vreg, \vreg, 2  // vreg id to byte offset
+    add \vreg, xFP, \vreg  // vreg addr in register array
+    fld \reg, (\vreg)       // reg := fp[vreg](lo) | fp[vreg+1](hi)
+.endm
+
+// Floating-point 64 bit write
+// Clobbers: t0, \vreg
+.macro SET_VREG_DOUBLE reg, vreg
+    slliw \vreg, \vreg, 2  // vreg id to byte offset
+    add t0, xFP, \vreg     // vreg addr in register array
+    fsd \reg, (t0)         // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
+    add t0, xREFS, \vreg   // vreg addr in reference array
+    sd zero, (t0)          // refs[vreg] := null ; refs[vreg+1] := null
+.endm
+
 %def entry():
 /*
  * ArtMethod entry point.
diff --git a/runtime/nterp_helpers.cc b/runtime/nterp_helpers.cc
index 5e47862..77aeaf2 100644
--- a/runtime/nterp_helpers.cc
+++ b/runtime/nterp_helpers.cc
@@ -281,8 +281,20 @@
         case Instruction::NOT_INT:
         case Instruction::NEG_LONG:
         case Instruction::NOT_LONG:
+        case Instruction::NEG_FLOAT:
+        case Instruction::NEG_DOUBLE:
         case Instruction::INT_TO_LONG:
+        case Instruction::INT_TO_FLOAT:
+        case Instruction::INT_TO_DOUBLE:
         case Instruction::LONG_TO_INT:
+        case Instruction::LONG_TO_FLOAT:
+        case Instruction::LONG_TO_DOUBLE:
+        case Instruction::FLOAT_TO_INT:
+        case Instruction::FLOAT_TO_LONG:
+        case Instruction::FLOAT_TO_DOUBLE:
+        case Instruction::DOUBLE_TO_INT:
+        case Instruction::DOUBLE_TO_LONG:
+        case Instruction::DOUBLE_TO_FLOAT:
         case Instruction::INT_TO_BYTE:
         case Instruction::INT_TO_CHAR:
         case Instruction::INT_TO_SHORT:
@@ -308,6 +320,16 @@
         case Instruction::SHL_LONG:
         case Instruction::SHR_LONG:
         case Instruction::USHR_LONG:
+        case Instruction::ADD_FLOAT:
+        case Instruction::SUB_FLOAT:
+        case Instruction::MUL_FLOAT:
+        case Instruction::DIV_FLOAT:
+        case Instruction::REM_FLOAT:
+        case Instruction::ADD_DOUBLE:
+        case Instruction::SUB_DOUBLE:
+        case Instruction::MUL_DOUBLE:
+        case Instruction::DIV_DOUBLE:
+        case Instruction::REM_DOUBLE:
         case Instruction::ADD_INT_2ADDR:
         case Instruction::SUB_INT_2ADDR:
         case Instruction::MUL_INT_2ADDR:
@@ -330,6 +352,16 @@
         case Instruction::SHL_LONG_2ADDR:
         case Instruction::SHR_LONG_2ADDR:
         case Instruction::USHR_LONG_2ADDR:
+        case Instruction::ADD_FLOAT_2ADDR:
+        case Instruction::SUB_FLOAT_2ADDR:
+        case Instruction::MUL_FLOAT_2ADDR:
+        case Instruction::DIV_FLOAT_2ADDR:
+        case Instruction::REM_FLOAT_2ADDR:
+        case Instruction::ADD_DOUBLE_2ADDR:
+        case Instruction::SUB_DOUBLE_2ADDR:
+        case Instruction::MUL_DOUBLE_2ADDR:
+        case Instruction::DIV_DOUBLE_2ADDR:
+        case Instruction::REM_DOUBLE_2ADDR:
         case Instruction::ADD_INT_LIT16:
         case Instruction::RSUB_INT:
         case Instruction::MUL_INT_LIT16: