riscv64: re-enable an invoke fast path

The meaning of ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT
for RISC-V narrows the condition to object refs only.

See invoke.S for notes on code structure.

Test: Run these opcodes against all interpreter
tests on a Linux RISC-V VM.

(1) setup
  lunch aosp_riscv64-trunk-userdebug

  export ART_TEST_SSH_USER=ubuntu
  export ART_TEST_SSH_HOST=localhost
  export ART_TEST_SSH_PORT=10001
  export ART_TEST_ON_VM=true

  . art/tools/buildbot-utils.sh
  art/tools/buildbot-build.sh --target

  # Create, boot and configure the VM.
  art/tools/buildbot-vm.sh create
  art/tools/buildbot-vm.sh boot
  art/tools/buildbot-vm.sh setup-ssh  # password: 'ubuntu'

  art/tools/buildbot-cleanup-device.sh
  art/tools/buildbot-setup-device.sh
  art/tools/buildbot-sync.sh

(2) test
  art/test.py --target -r --no-prebuild --ndebug --64  -j 12 --cdex-none --interpreter

Clean with `m check_cfi`.

Bug: 283082047
Change-Id: Ie1ecee895cb6d8abad40970041e95388feb530e2
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d08fe23..3b0dda2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4005,13 +4005,12 @@
       }
     }
   }
-  if (all_parameters_are_reference_or_int && shorty[0] != 'F' && shorty[0] != 'D') {
-    // FIXME(riscv64): This optimization is currently disabled because riscv64 needs
-    // to distinguish between zero-extended references and sign-extended integers.
-    // We should enable this for references only and fix corresponding nterp fast-paths.
-    if (kRuntimeISA != InstructionSet::kRiscv64) {
-      access_flags |= kAccNterpInvokeFastPathFlag;
-    }
+  if (kRuntimeISA != InstructionSet::kRiscv64 && all_parameters_are_reference_or_int &&
+      shorty[0] != 'F' && shorty[0] != 'D') {
+    access_flags |= kAccNterpInvokeFastPathFlag;
+  } else if (kRuntimeISA == InstructionSet::kRiscv64 && all_parameters_are_reference &&
+             shorty[0] != 'F' && shorty[0] != 'D') {
+    access_flags |= kAccNterpInvokeFastPathFlag;
   }
 
   if (UNLIKELY((access_flags & kAccNative) != 0u)) {
diff --git a/runtime/interpreter/mterp/riscv64/invoke.S b/runtime/interpreter/mterp/riscv64/invoke.S
index 569b750..0bce65c 100644
--- a/runtime/interpreter/mterp/riscv64/invoke.S
+++ b/runtime/interpreter/mterp/riscv64/invoke.S
@@ -15,9 +15,8 @@
 // (0) If the next method's "quick code" is nterp, then set up a fresh nterp frame and perform a
 //     vreg->vreg transfer. Jump to handler for the next method's first opcode.
 // - The following paths leave nterp. -
-// (1) If the next method is guaranteed to avoid floats, doubles, and longs, then the managed ABI is
-//     very simple: just place all arguments in the native arg registers. We don't need to know the
-//     precise types or widths, just the order matters. Call the quick code.
+// (1) If the next method is guaranteed to be only object refs, then the managed ABI is very simple:
+//     just place all arguments in the native arg registers using LWU. Call the quick code.
 // (2) If the next method has 0 or 1 argument, then the managed ABI is mildly overloaded by
 //     pessimistically placing a singleton 32-bit arg in both a0 and fa0; we don't have to know if
 //     the argument is an int or float. We might be able to avoid the shorty ...
@@ -664,6 +663,7 @@
 // Temporaries: z0, z1
 %def try_simple_args(v_fedc="", z0="", z1="", arg_start="1", skip="", uniq=""):
    lwu $z0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
+   // The meaning of nterp-invoke-fast-path-flag for RISC-V diverges from other ISAs.
    BRANCH_IF_BIT_CLEAR $z0, $z0, ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, $skip
 
    srliw $z0, xINST, 12              // z0 := A
@@ -679,22 +679,22 @@
    // A = 5
    srliw $z1, xINST, 8               // z1 := A|G
    andi $z1, $z1, 0xF                // z1 := G
-   GET_VREG a5, $z1
+   GET_VREG_OBJECT a5, $z1
 .L${uniq}_simple_4:
    srliw $z1, $v_fedc, 12            // z1 := F
-   GET_VREG a4, $z1
+   GET_VREG_OBJECT a4, $z1
 .L${uniq}_simple_3:
    srliw $z1, $v_fedc, 8             // z1 := F|E
    andi $z1, $z1, 0xF                // z1 := E
-   GET_VREG a3, $z1
+   GET_VREG_OBJECT a3, $z1
 .L${uniq}_simple_2:
    srliw $z1, $v_fedc, 4             // z1 := F|E|D
    andi $z1, $z1, 0xF                // z1 := D
-   GET_VREG a2, $z1
+   GET_VREG_OBJECT a2, $z1
 .L${uniq}_simple_1:
 %  if arg_start == "0":
      andi $z1, $v_fedc, 0xF          // z1 := C
-     GET_VREG a1, $z1
+     GET_VREG_OBJECT a1, $z1
    // instance: a1 already set to "this"
 .L${uniq}_simple_done:
 
@@ -702,6 +702,7 @@
 // Range variant.
 %def try_simple_args_range(vC="", z0="", z1="", z2="", z3="", z4="", skip="", arg_start="1", uniq=""):
    lwu $z0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
+   // The meaning of nterp-invoke-fast-path-flag for RISC-V diverges from other ISAs.
    BRANCH_IF_BIT_CLEAR $z0, $z0, ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, $skip
 
    srliw $z0, xINST, 8                 // z0 := AA
@@ -734,7 +735,7 @@
                                      // z4 := &OUT[CCCC + 7]
    bltz $z2, .L${uniq}_simple_loop_wide
                                      // if AA odd, branch to wide-copy
-   lw $z2, ($z3)
+   lwu $z2, ($z3)
    sw $z2, ($z4)
    addi $z3, $z3, 4
    addi $z4, $z4, 4
@@ -750,20 +751,20 @@
 
    // Bottom 7 slots of OUT array never written; first args are passed with a1-a7.
 .L${uniq}_simple_7:
-   lw a7, 6*4($z1)
+   lwu a7, 6*4($z1)
 .L${uniq}_simple_6:
-   lw a6, 5*4($z1)
+   lwu a6, 5*4($z1)
 .L${uniq}_simple_5:
-   lw a5, 4*4($z1)
+   lwu a5, 4*4($z1)
 .L${uniq}_simple_4:
-   lw a4, 3*4($z1)
+   lwu a4, 3*4($z1)
 .L${uniq}_simple_3:
-   lw a3, 2*4($z1)
+   lwu a3, 2*4($z1)
 .L${uniq}_simple_2:
-   lw a2, 1*4($z1)
+   lwu a2, 1*4($z1)
 .L${uniq}_simple_1:
 %  if arg_start == "0":  # static:
-     lw a1, 0*4($z1)
+     lwu a1, 0*4($z1)
 %#:
 .L${uniq}_simple_done: