riscv64: hoist char immediates in slow path

See invoke.S for notes on code structure.

Test: Run these opcodes against all interpreter
tests on a Linux RISC-V VM.

(1) setup
  lunch aosp_riscv64-trunk-userdebug

  export ART_TEST_SSH_USER=ubuntu
  export ART_TEST_SSH_HOST=localhost
  export ART_TEST_SSH_PORT=10001
  export ART_TEST_ON_VM=true

  . art/tools/buildbot-utils.sh
  art/tools/buildbot-build.sh --target

  # Create, boot and configure the VM.
  art/tools/buildbot-vm.sh create
  art/tools/buildbot-vm.sh boot
  art/tools/buildbot-vm.sh setup-ssh  # password: 'ubuntu'

  art/tools/buildbot-cleanup-device.sh
  art/tools/buildbot-setup-device.sh
  art/tools/buildbot-sync.sh

(2) test
  art/test.py --target -r --no-prebuild --ndebug --64  -j 12 --cdex-none --interpreter

Clean with `m check_cfi`.

Bug: 283082047
Change-Id: Ic5244aa914511af2f99915085dd4a00b31c65cb5
diff --git a/runtime/interpreter/mterp/riscv64/invoke.S b/runtime/interpreter/mterp/riscv64/invoke.S
index 62cee02..e93cc9f 100644
--- a/runtime/interpreter/mterp/riscv64/invoke.S
+++ b/runtime/interpreter/mterp/riscv64/invoke.S
@@ -414,7 +414,7 @@
 
 .L${uniq}_slow:
 %  get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11")
-%  slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", uniq=uniq)
+%  slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", uniq=uniq)
    jalr s8                       // args in a0-a5, fa0-fa4
 %  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
                                  // a0 := fa0 if float return
@@ -437,7 +437,7 @@
 
 .L${uniq}_slow:
 %  get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11")
-%  slow_setup_args_string_init(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", uniq=uniq)
+%  slow_setup_args_string_init(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", uniq=uniq)
    mv s9, a1                     // save "this" in callee-save for return-time fixup
    jalr s8                       // args in a0-a5, fa0-fa4
 
@@ -483,7 +483,7 @@
 
 .L${uniq}_slow:
 %  get_shorty_save_a0(shorty="s9", y0="s10")
-%  slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", arg_start="0", uniq=uniq)
+%  slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", arg_start="0", uniq=uniq)
    jalr s8                       // args in a0-a5, fa0-fa4
 %  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
                                  // a0 := fa0 if float return
@@ -533,7 +533,7 @@
 
 .L${uniq}_slow:
 %  get_shorty_for_interface_save_a0_a1(shorty="s9", y0="s10", y1="s11")
-%  slow_setup_args(shorty="s9", vregs="s7", z0="t1", z1="t2", z2="t3", z3="t4", uniq=uniq)
+%  slow_setup_args(shorty="s9", vregs="s7", z0="t1", z1="t2", z2="t3", z3="t4", z4="t5", z5="t6", uniq=uniq)
    jalr s8                       // args a0-a5, fa0-fa4, and t0
 %  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
                                  // a0 := fa0 if float return
@@ -791,8 +791,8 @@
 // - a0: ArtMethod*
 // - a1: this
 // Input
-// - vregs: F|E|D|C
-%def slow_setup_args(shorty="", vregs="", z0="", z1="", z2="", z3="", arg_start="1", uniq=""):
+// - vregs: F|E|D|C from dex
+%def slow_setup_args(shorty="", vregs="", z0="", z1="", z2="", z3="", z4="", z5="", arg_start="1", uniq=""):
    srliw $z0, xINST, 12     // z0 := A
    li $z1, 5
    blt $z0, $z1, .L${uniq}_slow_gpr
@@ -806,31 +806,34 @@
    addi $z0, $shorty, 1     // z0 := first arg of shorty
    srliw $z1, $vregs, 4*$arg_start
                             // z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C
+   li $z2, 'D'              // double
+   li $z3, 'F'              // float
+   li $z4, 'J'              // long
    // linear scan through shorty: extract non-float vregs
 %  if arg_start == "0":  # static can place vC into a1; instance already loaded "this" into a1.
-%    load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
-%  load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
-%  load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
-%  load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
-%  load_vreg_in_gpr(gpr="a5", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4")
+%    load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
+%  load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
+%  load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
+%  load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
+%  load_vreg_in_gpr(gpr="a5", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4")
 
 .L${uniq}_slow_fpr:
    addi $z0, $shorty, 1     // z0 := first arg of shorty
    srliw $z1, $vregs, 4*$arg_start
                             // z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C
    // linear scan through shorty: extract float/double vregs
-%  load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
-%  load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
-%  load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
-%  load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
+%  load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
+%  load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
+%  load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
+%  load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
 %  if arg_start == "0":  # static can place G into fa4; instance has only 4 args.
-%    load_vreg_in_fpr(fpr="fa4", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_4")
+%    load_vreg_in_fpr(fpr="fa4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_4")
 %#:
 .L${uniq}_slow_done:
 
 
 // string-init variant
-%def slow_setup_args_string_init(shorty="", vregs="", z0="", z1="", z2="", z3="", uniq=""):
+%def slow_setup_args_string_init(shorty="", vregs="", z0="", z1="", z2="", z3="", z4="", z5="", uniq=""):
    srliw $z0, xINST, 12     // z0 := A
    li $z1, 5
    blt $z0, $z1, .L${uniq}_slow_gpr
@@ -843,39 +846,39 @@
 .L${uniq}_slow_gpr:
    addi $z0, $shorty, 1     // z0 := first arg of shorty
    srliw $z1, $vregs, 4     // z1 := (instance) F|E|D or G|F|E|D
+   li $z2, 'D'              // double
+   li $z3, 'F'              // float
+   li $z4, 'J'              // long
    // linear scan through shorty: extract non-float vregs
-%  load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
-%  load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
-%  load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
-%  load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
+%  load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
+%  load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
+%  load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
+%  load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
 
    // TODO: java.lang.StringFactory methods don't have floating point args; skip FPR loads.
 .L${uniq}_slow_fpr:
    addi $z0, $shorty, 1     // z0 := first arg of shorty
    srliw $z1, $vregs, 4     // z1 := (instance) F|E|D or G|F|E|D
    // linear scan through shorty: extract float/double vregs
-%  load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
-%  load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
-%  load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
-%  load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
+%  load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
+%  load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
+%  load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
+%  load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
 .L${uniq}_slow_done:
 
 
 // Iterate through 4-bit vreg ids in the "vregs" register, load a non-FP value
 // into one argument register.
-%def load_vreg_in_gpr(gpr="", shorty="", vregs="", z0="", z1="", done="", uniq=""):
+%def load_vreg_in_gpr(gpr="", shorty="", vregs="", z0="", D="", F="", J="", done="", uniq=""):
 .L${uniq}_gpr_find:
     lb $z0, ($shorty)         // z0 := next shorty arg spec
     addi $shorty, $shorty, 1  // increment char ptr
     beqz $z0, $done           // z0 == \0
-    li $z1, 'F'               // float
-    beq $z0, $z1, .L${uniq}_gpr_skip_4_bytes
-    li $z1, 'D'               // double
-    beq $z0, $z1, .L${uniq}_gpr_skip_8_bytes
+    beq $z0, $F, .L${uniq}_gpr_skip_4_bytes
+    beq $z0, $D, .L${uniq}_gpr_skip_8_bytes
 
-    li $z1, 'J'               // long
     andi $gpr, $vregs, 0xF    // gpr := vreg id
-    beq $z0, $z1, .L${uniq}_gpr_load_8_bytes
+    beq $z0, $J, .L${uniq}_gpr_load_8_bytes
     GET_VREG $gpr, $gpr       // gpr := 32-bit load
     srliw $vregs, $vregs, 4   // shift out the processed arg, one vreg
     j .L${uniq}_gpr_set       // and exit
@@ -894,30 +897,27 @@
 
 // Iterate through 4-bit vreg ids in the "vregs" register, load a float or double
 // value into one floating point argument register.
-%def load_vreg_in_fpr(fpr="", shorty="", vregs="", z0="", z1="", done="", uniq=""):
+%def load_vreg_in_fpr(fpr="", shorty="", vregs="", z0="", D="", F="", J="", done="", uniq=""):
 .L${uniq}_fpr_find:
     lb $z0, ($shorty)         // z0 := next shorty arg spec
     addi $shorty, $shorty, 1  // increment char ptr
     beqz $z0, $done           // z0 == \0
-    li $z1, 'F'               // float
-    beq $z0, $z1, .L${uniq}_fpr_load_4_bytes
-    li $z1, 'D'               // double
-    beq $z0, $z1, .L${uniq}_fpr_load_8_bytes
+    beq $z0, $F, .L${uniq}_fpr_load_4_bytes
+    beq $z0, $D, .L${uniq}_fpr_load_8_bytes
 
-    li $z1, 'J'               // long
     srliw $vregs, $vregs, 4   // shift out a skipped arg, one vreg
-    bne $z0, $z1, .L${uniq}_fpr_find
+    bne $z0, $J, .L${uniq}_fpr_find
     srliw $vregs, $vregs, 4   // shift out one more skipped arg, for J
     j .L${uniq}_fpr_find
 
 .L${uniq}_fpr_load_4_bytes:
-    andi $z1, $vregs, 0xF
-    GET_VREG_FLOAT $fpr, $z1
+    andi $z0, $vregs, 0xF
+    GET_VREG_FLOAT $fpr, $z0
     srliw $vregs, $vregs, 4   // shift out the processed arg, one vreg
     j .L${uniq}_fpr_set
 .L${uniq}_fpr_load_8_bytes:
-    andi $z1, $vregs, 0xF
-    GET_VREG_DOUBLE $fpr, $z1
+    andi $z0, $vregs, 0xF
+    GET_VREG_DOUBLE $fpr, $z0
     srliw $vregs, $vregs, 8   // shift out the processed arg, a vreg pair
 .L${uniq}_fpr_set: