arm64 nterp: Use fewer instructions for wide ops.
Test: testrunner.py --target --64 --jit
Change-Id: I84b042575c7521db72820375861c4317888fe508
diff --git a/runtime/interpreter/mterp/arm64ng/arithmetic.S b/runtime/interpreter/mterp/arm64ng/arithmetic.S
index cf9dd86..f751f99 100644
--- a/runtime/interpreter/mterp/arm64ng/arithmetic.S
+++ b/runtime/interpreter/mterp/arm64ng/arithmetic.S
@@ -141,21 +141,22 @@
* xor-long, add-double, sub-double, mul-double, div-double, rem-double
*/
/* binop vAA, vBB, vCC */
- FETCH w0, 1 // w0<- CCBB
- lsr w4, wINST, #8 // w4<- AA
- lsr w2, w0, #8 // w2<- CC
- and w1, w0, #255 // w1<- BB
- GET_VREG_WIDE $r2, w2 // w2<- vCC
- GET_VREG_WIDE $r1, w1 // w1<- vBB
+ FETCH w0, 1 // w0<- CCBB
+ LOAD_SCALED_VREG_MASK w5, 0xff // w5<- ff * sizeof(vreg)
+ EXTRACT_SCALED_VREG w4, w5, wINST, 8 // w4<- AA * sizeof(vreg)
+ EXTRACT_SCALED_VREG w2, w5, w0, 8 // w2<- CC * sizeof(vreg)
+ EXTRACT_SCALED_VREG w1, w5, w0, 0 // w1<- BB * sizeof(vreg)
+ GET_VREG_WIDE_PRESCALED $r2, w2 // w2<- vCC
+ GET_VREG_WIDE_PRESCALED $r1, w1 // w1<- vBB
.if $chkzero
- cbz $r2, common_errDivideByZero // is second operand zero?
+ cbz $r2, common_errDivideByZero // is second operand zero?
.endif
- FETCH_ADVANCE_INST 2 // advance rPC, load rINST
+ FETCH_ADVANCE_INST 2 // advance rPC, load rINST
$preinstr
- $instr // $result<- op, w0-w4 changed
- GET_INST_OPCODE ip // extract opcode from rINST
- SET_VREG_WIDE $result, w4 // vAA<- $result
- GOTO_OPCODE ip // jump to next instruction
+ $instr // $result<- op, w0-w4 changed
+ GET_INST_OPCODE ip // extract opcode from rINST
+ SET_VREG_WIDE_PRESCALED $result, w4 // vAA<- $result
+ GOTO_OPCODE ip // jump to next instruction
/* 11-14 instructions */
%def binopWide2addr(preinstr="", instr="add x0, x0, x1", r0="x0", r1="x1", chkzero="0"):
@@ -300,11 +301,12 @@
%def op_cmp_long():
FETCH w0, 1 // w0<- CCBB
+ LOAD_SCALED_VREG_MASK w5, 0xff // w4<- ff * sizeof(vreg)
lsr w4, wINST, #8 // w4<- AA
- and w2, w0, #255 // w2<- BB
- lsr w3, w0, #8 // w3<- CC
- GET_VREG_WIDE x1, w2
- GET_VREG_WIDE x2, w3
+ EXTRACT_SCALED_VREG w2, w5, w0, 0 // w2<- BB * sizeof(vreg)
+ EXTRACT_SCALED_VREG w3, w5, w0, 8 // w3<- CC * sizeof(vreg)
+ GET_VREG_WIDE_PRESCALED x1, w2
+ GET_VREG_WIDE_PRESCALED x2, w3
cmp x1, x2
cset w0, ne
cneg w0, w0, lt
diff --git a/runtime/interpreter/mterp/arm64ng/floating_point.S b/runtime/interpreter/mterp/arm64ng/floating_point.S
index ad42db3..a91fcf7 100644
--- a/runtime/interpreter/mterp/arm64ng/floating_point.S
+++ b/runtime/interpreter/mterp/arm64ng/floating_point.S
@@ -69,20 +69,24 @@
SET_VREG_DOUBLE $r0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
-%def fcmp(wide="", r1="s1", r2="s2", cond="lt"):
+%def fcmp(r1="s1", r2="s2", cond="lt"):
/*
* Compare two floating-point values. Puts 0, 1, or -1 into the
* destination register based on the results of the comparison.
*/
/* op vAA, vBB, vCC */
FETCH w0, 1 // w0<- CCBB
+% if r1.startswith("d"):
+ LOAD_SCALED_VREG_MASK w5, 0xff // w4<- ff * sizeof(vreg)
+ lsr w4, wINST, #8 // w4<- AA
+ EXTRACT_SCALED_VREG w2, w5, w0, 0 // w2<- BB * sizeof(vreg)
+ EXTRACT_SCALED_VREG w3, w5, w0, 8 // w3<- CC * sizeof(vreg)
+ GET_VREG_DOUBLE_PRESCALED $r1, w2
+ GET_VREG_DOUBLE_PRESCALED $r2, w3
+% else:
lsr w4, wINST, #8 // w4<- AA
and w2, w0, #255 // w2<- BB
lsr w3, w0, #8 // w3<- CC
-% if r1.startswith("d"):
- GET_VREG_DOUBLE $r1, w2
- GET_VREG_DOUBLE $r2, w3
-% else:
GET_VREG $r1, w2
GET_VREG $r2, w3
% #endif
@@ -188,16 +192,16 @@
% fbinop2addr(instr="fadd s2, s0, s1")
%def op_cmpg_double():
-% fcmp(wide="_WIDE", r1="d1", r2="d2", cond="cc")
+% fcmp(r1="d1", r2="d2", cond="cc")
%def op_cmpg_float():
-% fcmp(wide="", r1="s1", r2="s2", cond="cc")
+% fcmp(r1="s1", r2="s2", cond="cc")
%def op_cmpl_double():
-% fcmp(wide="_WIDE", r1="d1", r2="d2", cond="lt")
+% fcmp(r1="d1", r2="d2", cond="lt")
%def op_cmpl_float():
-% fcmp(wide="", r1="s1", r2="s2", cond="lt")
+% fcmp(r1="s1", r2="s2", cond="lt")
%def op_div_double():
% fbinopWide(instr="fdiv d0, d1, d2", result="d0", r1="d1", r2="d2")
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S
index 0ed237c..ae08f3c 100644
--- a/runtime/interpreter/mterp/arm64ng/main.S
+++ b/runtime/interpreter/mterp/arm64ng/main.S
@@ -203,25 +203,39 @@
/*
* Get/set the 64-bit value from a Dalvik register.
*/
+.macro LOAD_SCALED_VREG_MASK scaled_mask_reg, unscaled_mask
+ mov \scaled_mask_reg, \unscaled_mask << 2
+.endm
+.macro EXTRACT_SCALED_VREG scaled_vreg, scaled_mask_reg, src_reg, lsb
+ .if \lsb < 2
+ and \scaled_vreg, \scaled_mask_reg, \src_reg, lsl #(2 - \lsb)
+ .else
+ and \scaled_vreg, \scaled_mask_reg, \src_reg, lsr #(\lsb - 2)
+ .endif
+.endm
+.macro GET_VREG_WIDE_PRESCALED reg, scaled_vreg
+ ldr \reg, [xFP, \scaled_vreg, uxtw]
+.endm
.macro GET_VREG_WIDE reg, vreg
- add ip2, xFP, \vreg, uxtw #2
- ldr \reg, [ip2]
+ lsl wip2, \vreg, #2
+ GET_VREG_WIDE_PRESCALED \reg, wip2
+.endm
+.macro SET_VREG_WIDE_PRESCALED reg, scaled_vreg
+ str \reg, [xFP, \scaled_vreg, uxtw]
+ str xzr, [xREFS, \scaled_vreg, uxtw]
.endm
.macro SET_VREG_WIDE reg, vreg
- add ip2, xFP, \vreg, uxtw #2
- str \reg, [ip2]
- add ip2, xREFS, \vreg, uxtw #2
- str xzr, [ip2]
+ lsl wip2, \vreg, #2
+ SET_VREG_WIDE_PRESCALED \reg, wip2
+.endm
+.macro GET_VREG_DOUBLE_PRESCALED reg, scaled_vreg
+ GET_VREG_WIDE_PRESCALED \reg, \scaled_vreg
.endm
.macro GET_VREG_DOUBLE reg, vreg
- add ip2, xFP, \vreg, uxtw #2
- ldr \reg, [ip2]
+ GET_VREG_WIDE \reg, \vreg
.endm
.macro SET_VREG_DOUBLE reg, vreg
- add ip2, xFP, \vreg, uxtw #2
- str \reg, [ip2]
- add ip2, xREFS, \vreg, uxtw #2
- str xzr, [ip2]
+ SET_VREG_WIDE \reg, \vreg
.endm
/*