riscv64: hoist char immediates in slow path
See invoke.S for notes on code structure.
Test: Run these opcodes against all interpreter
tests on a Linux RISC-V VM.
(1) setup
lunch aosp_riscv64-trunk-userdebug
export ART_TEST_SSH_USER=ubuntu
export ART_TEST_SSH_HOST=localhost
export ART_TEST_SSH_PORT=10001
export ART_TEST_ON_VM=true
. art/tools/buildbot-utils.sh
art/tools/buildbot-build.sh --target
# Create, boot and configure the VM.
art/tools/buildbot-vm.sh create
art/tools/buildbot-vm.sh boot
art/tools/buildbot-vm.sh setup-ssh # password: 'ubuntu'
art/tools/buildbot-cleanup-device.sh
art/tools/buildbot-setup-device.sh
art/tools/buildbot-sync.sh
(2) test
art/test.py --target -r --no-prebuild --ndebug --64 -j 12 --cdex-none --interpreter
Clean with `m check_cfi`.
Bug: 283082047
Change-Id: Ic5244aa914511af2f99915085dd4a00b31c65cb5
diff --git a/runtime/interpreter/mterp/riscv64/invoke.S b/runtime/interpreter/mterp/riscv64/invoke.S
index 62cee02..e93cc9f 100644
--- a/runtime/interpreter/mterp/riscv64/invoke.S
+++ b/runtime/interpreter/mterp/riscv64/invoke.S
@@ -414,7 +414,7 @@
.L${uniq}_slow:
% get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11")
-% slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", uniq=uniq)
+% slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", uniq=uniq)
jalr s8 // args in a0-a5, fa0-fa4
% maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
// a0 := fa0 if float return
@@ -437,7 +437,7 @@
.L${uniq}_slow:
% get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11")
-% slow_setup_args_string_init(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", uniq=uniq)
+% slow_setup_args_string_init(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", uniq=uniq)
mv s9, a1 // save "this" in callee-save for return-time fixup
jalr s8 // args in a0-a5, fa0-fa4
@@ -483,7 +483,7 @@
.L${uniq}_slow:
% get_shorty_save_a0(shorty="s9", y0="s10")
-% slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", arg_start="0", uniq=uniq)
+% slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", arg_start="0", uniq=uniq)
jalr s8 // args in a0-a5, fa0-fa4
% maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
// a0 := fa0 if float return
@@ -533,7 +533,7 @@
.L${uniq}_slow:
% get_shorty_for_interface_save_a0_a1(shorty="s9", y0="s10", y1="s11")
-% slow_setup_args(shorty="s9", vregs="s7", z0="t1", z1="t2", z2="t3", z3="t4", uniq=uniq)
+% slow_setup_args(shorty="s9", vregs="s7", z0="t1", z1="t2", z2="t3", z3="t4", z4="t5", z5="t6", uniq=uniq)
jalr s8 // args a0-a5, fa0-fa4, and t0
% maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
// a0 := fa0 if float return
@@ -791,8 +791,8 @@
// - a0: ArtMethod*
// - a1: this
// Input
-// - vregs: F|E|D|C
-%def slow_setup_args(shorty="", vregs="", z0="", z1="", z2="", z3="", arg_start="1", uniq=""):
+// - vregs: F|E|D|C from dex
+%def slow_setup_args(shorty="", vregs="", z0="", z1="", z2="", z3="", z4="", z5="", arg_start="1", uniq=""):
srliw $z0, xINST, 12 // z0 := A
li $z1, 5
blt $z0, $z1, .L${uniq}_slow_gpr
@@ -806,31 +806,34 @@
addi $z0, $shorty, 1 // z0 := first arg of shorty
srliw $z1, $vregs, 4*$arg_start
// z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C
+ li $z2, 'D' // double
+ li $z3, 'F' // float
+ li $z4, 'J' // long
// linear scan through shorty: extract non-float vregs
% if arg_start == "0": # static can place vC into a1; instance already loaded "this" into a1.
-% load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
-% load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
-% load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
-% load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
-% load_vreg_in_gpr(gpr="a5", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4")
+% load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
+% load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
+% load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
+% load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
+% load_vreg_in_gpr(gpr="a5", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4")
.L${uniq}_slow_fpr:
addi $z0, $shorty, 1 // z0 := first arg of shorty
srliw $z1, $vregs, 4*$arg_start
// z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C
// linear scan through shorty: extract float/double vregs
-% load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
-% load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
-% load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
-% load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
+% load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
+% load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
+% load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
+% load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
% if arg_start == "0": # static can place G into fa4; instance has only 4 args.
-% load_vreg_in_fpr(fpr="fa4", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_4")
+% load_vreg_in_fpr(fpr="fa4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_4")
%#:
.L${uniq}_slow_done:
// string-init variant
-%def slow_setup_args_string_init(shorty="", vregs="", z0="", z1="", z2="", z3="", uniq=""):
+%def slow_setup_args_string_init(shorty="", vregs="", z0="", z1="", z2="", z3="", z4="", z5="", uniq=""):
srliw $z0, xINST, 12 // z0 := A
li $z1, 5
blt $z0, $z1, .L${uniq}_slow_gpr
@@ -843,39 +846,39 @@
.L${uniq}_slow_gpr:
addi $z0, $shorty, 1 // z0 := first arg of shorty
srliw $z1, $vregs, 4 // z1 := (instance) F|E|D or G|F|E|D
+ li $z2, 'D' // double
+ li $z3, 'F' // float
+ li $z4, 'J' // long
// linear scan through shorty: extract non-float vregs
-% load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
-% load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
-% load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
-% load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
+% load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
+% load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
+% load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
+% load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
// TODO: java.lang.StringFactory methods don't have floating point args; skip FPR loads.
.L${uniq}_slow_fpr:
addi $z0, $shorty, 1 // z0 := first arg of shorty
srliw $z1, $vregs, 4 // z1 := (instance) F|E|D or G|F|E|D
// linear scan through shorty: extract float/double vregs
-% load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
-% load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
-% load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
-% load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, z0=z2, z1=z3, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
+% load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
+% load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
+% load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
+% load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
.L${uniq}_slow_done:
// Iterate through 4-bit vreg ids in the "vregs" register, load a non-FP value
// into one argument register.
-%def load_vreg_in_gpr(gpr="", shorty="", vregs="", z0="", z1="", done="", uniq=""):
+%def load_vreg_in_gpr(gpr="", shorty="", vregs="", z0="", D="", F="", J="", done="", uniq=""):
.L${uniq}_gpr_find:
lb $z0, ($shorty) // z0 := next shorty arg spec
addi $shorty, $shorty, 1 // increment char ptr
beqz $z0, $done // z0 == \0
- li $z1, 'F' // float
- beq $z0, $z1, .L${uniq}_gpr_skip_4_bytes
- li $z1, 'D' // double
- beq $z0, $z1, .L${uniq}_gpr_skip_8_bytes
+ beq $z0, $F, .L${uniq}_gpr_skip_4_bytes
+ beq $z0, $D, .L${uniq}_gpr_skip_8_bytes
- li $z1, 'J' // long
andi $gpr, $vregs, 0xF // gpr := vreg id
- beq $z0, $z1, .L${uniq}_gpr_load_8_bytes
+ beq $z0, $J, .L${uniq}_gpr_load_8_bytes
GET_VREG $gpr, $gpr // gpr := 32-bit load
srliw $vregs, $vregs, 4 // shift out the processed arg, one vreg
j .L${uniq}_gpr_set // and exit
@@ -894,30 +897,27 @@
// Iterate through 4-bit vreg ids in the "vregs" register, load a float or double
// value into one floating point argument register.
-%def load_vreg_in_fpr(fpr="", shorty="", vregs="", z0="", z1="", done="", uniq=""):
+%def load_vreg_in_fpr(fpr="", shorty="", vregs="", z0="", D="", F="", J="", done="", uniq=""):
.L${uniq}_fpr_find:
lb $z0, ($shorty) // z0 := next shorty arg spec
addi $shorty, $shorty, 1 // increment char ptr
beqz $z0, $done // z0 == \0
- li $z1, 'F' // float
- beq $z0, $z1, .L${uniq}_fpr_load_4_bytes
- li $z1, 'D' // double
- beq $z0, $z1, .L${uniq}_fpr_load_8_bytes
+ beq $z0, $F, .L${uniq}_fpr_load_4_bytes
+ beq $z0, $D, .L${uniq}_fpr_load_8_bytes
- li $z1, 'J' // long
srliw $vregs, $vregs, 4 // shift out a skipped arg, one vreg
- bne $z0, $z1, .L${uniq}_fpr_find
+ bne $z0, $J, .L${uniq}_fpr_find
srliw $vregs, $vregs, 4 // shift out one more skipped arg, for J
j .L${uniq}_fpr_find
.L${uniq}_fpr_load_4_bytes:
- andi $z1, $vregs, 0xF
- GET_VREG_FLOAT $fpr, $z1
+ andi $z0, $vregs, 0xF
+ GET_VREG_FLOAT $fpr, $z0
srliw $vregs, $vregs, 4 // shift out the processed arg, one vreg
j .L${uniq}_fpr_set
.L${uniq}_fpr_load_8_bytes:
- andi $z1, $vregs, 0xF
- GET_VREG_DOUBLE $fpr, $z1
+ andi $z0, $vregs, 0xF
+ GET_VREG_DOUBLE $fpr, $z0
srliw $vregs, $vregs, 8 // shift out the processed arg, a vreg pair
.L${uniq}_fpr_set: