MIPS32: Pass more arguments in registers.
Specifically, use A0-A3,T0-T1 for non-floats and F8-F19 for floats.
Test: booted MIPS32R2 in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R2) on CI20
Test: test-art-target-gtest (MIPS32R2) on CI20
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R6) in QEMU
Test: test-art-target-gtest (MIPS32R6) in QEMU
Test: test-art-host-gtest
Change-Id: Ib8b0310a109d9f3d70119c1e605e54b013e60728
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 34e34b4..3e8cdc9 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -167,50 +167,60 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
- * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
+ * (26 total + 1 word padding + method*)
*/
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
- addiu $sp, $sp, -80
- .cfi_adjust_cfa_offset 80
+ addiu $sp, $sp, -112
+ .cfi_adjust_cfa_offset 112
// Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 80)
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 112)
#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected."
#endif
- sw $ra, 76($sp)
- .cfi_rel_offset 31, 76
- sw $s8, 72($sp)
- .cfi_rel_offset 30, 72
- sw $gp, 68($sp)
- .cfi_rel_offset 28, 68
- sw $s7, 64($sp)
- .cfi_rel_offset 23, 64
- sw $s6, 60($sp)
- .cfi_rel_offset 22, 60
- sw $s5, 56($sp)
- .cfi_rel_offset 21, 56
- sw $s4, 52($sp)
- .cfi_rel_offset 20, 52
- sw $s3, 48($sp)
- .cfi_rel_offset 19, 48
- sw $s2, 44($sp)
- .cfi_rel_offset 18, 44
- sw $a3, 40($sp)
- .cfi_rel_offset 7, 40
- sw $a2, 36($sp)
- .cfi_rel_offset 6, 36
- sw $a1, 32($sp)
- .cfi_rel_offset 5, 32
- SDu $f14, $f15, 24, $sp, $t0
- SDu $f12, $f13, 16, $sp, $t0
+ sw $ra, 108($sp)
+ .cfi_rel_offset 31, 108
+ sw $s8, 104($sp)
+ .cfi_rel_offset 30, 104
+ sw $gp, 100($sp)
+ .cfi_rel_offset 28, 100
+ sw $s7, 96($sp)
+ .cfi_rel_offset 23, 96
+ sw $s6, 92($sp)
+ .cfi_rel_offset 22, 92
+ sw $s5, 88($sp)
+ .cfi_rel_offset 21, 88
+ sw $s4, 84($sp)
+ .cfi_rel_offset 20, 84
+ sw $s3, 80($sp)
+ .cfi_rel_offset 19, 80
+ sw $s2, 76($sp)
+ .cfi_rel_offset 18, 76
+ sw $t1, 72($sp)
+ .cfi_rel_offset 9, 72
+ sw $t0, 68($sp)
+ .cfi_rel_offset 8, 68
+ sw $a3, 64($sp)
+ .cfi_rel_offset 7, 64
+ sw $a2, 60($sp)
+ .cfi_rel_offset 6, 60
+ sw $a1, 56($sp)
+ .cfi_rel_offset 5, 56
+ SDu $f18, $f19, 48, $sp, $t8
+ SDu $f16, $f17, 40, $sp, $t8
+ SDu $f14, $f15, 32, $sp, $t8
+ SDu $f12, $f13, 24, $sp, $t8
+ SDu $f10, $f11, 16, $sp, $t8
+ SDu $f8, $f9, 8, $sp, $t8
# bottom will hold Method*
.endm
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
- * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
+ * (26 total + 1 word padding + method*)
* Clobbers $t0 and $sp
* Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
* Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
@@ -229,7 +239,8 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
- * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
+ * (26 total + 1 word padding + method*)
* Clobbers $sp
* Use $a0 as the Method* and loads it into bottom of stack.
* Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
@@ -246,34 +257,42 @@
.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack
.cfi_adjust_cfa_offset -ARG_SLOT_SIZE
- lw $ra, 76($sp)
+ lw $ra, 108($sp)
.cfi_restore 31
- lw $s8, 72($sp)
+ lw $s8, 104($sp)
.cfi_restore 30
- lw $gp, 68($sp)
+ lw $gp, 100($sp)
.cfi_restore 28
- lw $s7, 64($sp)
+ lw $s7, 96($sp)
.cfi_restore 23
- lw $s6, 60($sp)
+ lw $s6, 92($sp)
.cfi_restore 22
- lw $s5, 56($sp)
+ lw $s5, 88($sp)
.cfi_restore 21
- lw $s4, 52($sp)
+ lw $s4, 84($sp)
.cfi_restore 20
- lw $s3, 48($sp)
+ lw $s3, 80($sp)
.cfi_restore 19
- lw $s2, 44($sp)
+ lw $s2, 76($sp)
.cfi_restore 18
- lw $a3, 40($sp)
+ lw $t1, 72($sp)
+ .cfi_restore 9
+ lw $t0, 68($sp)
+ .cfi_restore 8
+ lw $a3, 64($sp)
.cfi_restore 7
- lw $a2, 36($sp)
+ lw $a2, 60($sp)
.cfi_restore 6
- lw $a1, 32($sp)
+ lw $a1, 56($sp)
.cfi_restore 5
- LDu $f14, $f15, 24, $sp, $t1
- LDu $f12, $f13, 16, $sp, $t1
- addiu $sp, $sp, 80 # pop frame
- .cfi_adjust_cfa_offset -80
+ LDu $f18, $f19, 48, $sp, $t8
+ LDu $f16, $f17, 40, $sp, $t8
+ LDu $f14, $f15, 32, $sp, $t8
+ LDu $f12, $f13, 24, $sp, $t8
+ LDu $f10, $f11, 16, $sp, $t8
+ LDu $f8, $f9, 8, $sp, $t8
+ addiu $sp, $sp, 112 # pop frame
+ .cfi_adjust_cfa_offset -112
.endm
/*
@@ -824,30 +843,56 @@
INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
-.macro LOAD_WORD_TO_REG reg, next_arg, index, label
+// Each of the following macros expands into four instructions or 16 bytes.
+// They are used to build indexable "tables" of code.
+
+.macro LOAD_WORD_TO_REG reg, next_arg, index_reg, label
lw $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4)
b \label
- addiu $\index, 1
+ addiu $\index_reg, 16
+ .balign 16
.endm
-.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index, label
+.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index_reg, next_index, label
lw $\reg1, -8($\next_arg) # next_arg points to argument after the current one (offset is 8)
lw $\reg2, -4($\next_arg)
b \label
- li $\index, 4 # long can be loaded only to a2_a3 pair so index will be always 4
+ li $\index_reg, \next_index
+ .balign 16
.endm
-.macro LOAD_FLOAT_TO_REG reg, next_arg, index, label
+.macro LOAD_FLOAT_TO_REG reg, next_arg, index_reg, label
lwc1 $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4)
b \label
- addiu $\index, 1
+ addiu $\index_reg, 16
+ .balign 16
.endm
-.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index, tmp, label
+#if defined(__mips_isa_rev) && __mips_isa_rev > 2
+// LDu expands into 3 instructions for 64-bit FPU, so index_reg cannot be updated here.
+.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label
+ .set reorder # force use of the branch delay slot
LDu $\reg1, $\reg2, -8, $\next_arg, $\tmp # next_arg points to argument after the current one
# (offset is 8)
b \label
- addiu $\index, 1
+ .set noreorder
+ .balign 16
+.endm
+#else
+// LDu expands into 2 instructions for 32-bit FPU, so index_reg is updated here.
+.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label
+ LDu $\reg1, $\reg2, -8, $\next_arg, $\tmp # next_arg points to argument after the current one
+ # (offset is 8)
+ b \label
+ addiu $\index_reg, 16
+ .balign 16
+.endm
+#endif
+
+.macro LOAD_END index_reg, next_index, label
+ b \label
+ li $\index_reg, \next_index
+ .balign 16
.endm
#define SPILL_SIZE 32
@@ -891,61 +936,63 @@
lw $gp, 16($fp) # restore $gp
lw $a0, SPILL_SIZE($fp) # restore ArtMethod*
lw $a1, 4($sp) # a1 = this*
- addiu $t0, $sp, 8 # t0 = pointer to the current argument (skip ArtMethod* and this*)
- li $t3, 2 # t3 = gpr_index = 2 (skip A0 and A1)
- move $t4, $zero # t4 = fp_index = 0
- lw $t1, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
+ addiu $t8, $sp, 8 # t8 = pointer to the current argument (skip ArtMethod* and this*)
+ li $t6, 0 # t6 = gpr_index = 0 (corresponds to A2; A0 and A1 are skipped)
+ li $t7, 0 # t7 = fp_index = 0
+ lw $t9, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
# as the $fp is SPILL_SIZE bytes below the $sp on entry)
- addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type)
+ addiu $t9, 1 # t9 = shorty + 1 (skip 1 for return type)
+
+ // Load the base addresses of tabInt ... tabDouble.
+ // We will use the register indices (gpr_index, fp_index) to branch.
+ // Note that the indices are scaled by 16, so they can be added to the bases directly.
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+ lapc $t2, tabInt
+ lapc $t3, tabLong
+ lapc $t4, tabSingle
+ lapc $t5, tabDouble
+#else
+ bltzal $zero, tabBase # nal
+ addiu $t2, $ra, %lo(tabInt - tabBase)
+tabBase:
+ addiu $t3, $ra, %lo(tabLong - tabBase)
+ addiu $t4, $ra, %lo(tabSingle - tabBase)
+ addiu $t5, $ra, %lo(tabDouble - tabBase)
+#endif
+
loop:
- lbu $t2, 0($t1) # t2 = shorty[i]
- beqz $t2, loopEnd # finish getting args when shorty[i] == '\0'
- addiu $t1, 1
+ lbu $ra, 0($t9) # ra = shorty[i]
+ beqz $ra, loopEnd # finish getting args when shorty[i] == '\0'
+ addiu $t9, 1
- li $t9, 'J' # put char 'J' into t9
- beq $t9, $t2, isLong # branch if result type char == 'J'
- li $t9, 'D' # put char 'D' into t9
- beq $t9, $t2, isDouble # branch if result type char == 'D'
- li $t9, 'F' # put char 'F' into t9
- beq $t9, $t2, isSingle # branch if result type char == 'F'
- addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot,
- # for both, int and single)
+ addiu $ra, -'J'
+ beqz $ra, isLong # branch if result type char == 'J'
+ addiu $ra, 'J' - 'D'
+ beqz $ra, isDouble # branch if result type char == 'D'
+ addiu $ra, 'D' - 'F'
+ beqz $ra, isSingle # branch if result type char == 'F'
- li $t5, 2 # skip a0 and a1 (ArtMethod* and this*)
- bne $t5, $t3, 1f # if (gpr_index == 2)
- addiu $t5, 1
- LOAD_WORD_TO_REG a2, t0, t3, loop # a2 = current argument, gpr_index++
-1: bne $t5, $t3, loop # else if (gpr_index == 3)
- nop
- LOAD_WORD_TO_REG a3, t0, t3, loop # a3 = current argument, gpr_index++
+ addu $ra, $t2, $t6
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
isLong:
- addiu $t0, 8 # next_arg = curr_arg + 8
- slti $t5, $t3, 3
- beqz $t5, 2f # if (gpr_index < 3)
- nop
- LOAD_LONG_TO_REG a2, a3, t0, t3, loop # a2_a3 = curr_arg, gpr_index = 4
-2: b loop # else
- li $t3, 4 # gpr_index = 4
-
-isDouble:
- addiu $t0, 8 # next_arg = curr_arg + 8
- li $t5, 0
- bne $t5, $t4, 3f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loop # f12_f13 = curr_arg, fp_index++
-3: bne $t5, $t4, loop # else if (fp_index == 1)
- nop
- LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loop # f14_f15 = curr_arg, fp_index++
+ addu $ra, $t3, $t6
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
isSingle:
- li $t5, 0
- bne $t5, $t4, 4f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_FLOAT_TO_REG f12, t0, t4, loop # f12 = curr_arg, fp_index++
-4: bne $t5, $t4, loop # else if (fp_index == 1)
- nop
- LOAD_FLOAT_TO_REG f14, t0, t4, loop # f14 = curr_arg, fp_index++
+ addu $ra, $t4, $t7
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
+
+isDouble:
+ addu $ra, $t5, $t7
+#if defined(__mips_isa_rev) && __mips_isa_rev > 2
+ addiu $t7, 16 # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG
+#endif
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
loopEnd:
lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code
@@ -976,6 +1023,38 @@
SDu $f0, $f1, 0, $t0, $t1 # store floating point result
jalr $zero, $ra
nop
+
+ // Note that gpr_index is kept within the range of tabInt and tabLong
+ // and fp_index is kept within the range of tabSingle and tabDouble.
+ .balign 16
+tabInt:
+ LOAD_WORD_TO_REG a2, t8, t6, loop # a2 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG a3, t8, t6, loop # a3 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t0, t8, t6, loop # t0 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t1, t8, t6, loop # t1 = current argument, gpr_index += 16
+ LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16
+tabLong:
+ LOAD_LONG_TO_REG a2, a3, t8, t6, 2*16, loop # a2_a3 = curr_arg, gpr_index = 2*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop # t0_t1 = curr_arg, gpr_index = 4*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop # t0_t1 = curr_arg, gpr_index = 4*16
+ LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16
+ LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16
+tabSingle:
+ LOAD_FLOAT_TO_REG f8, t8, t7, loop # f8 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f10, t8, t7, loop # f10 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f12, t8, t7, loop # f12 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f14, t8, t7, loop # f14 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f16, t8, t7, loop # f16 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f18, t8, t7, loop # f18 = curr_arg, fp_index += 16
+ LOAD_END t7, 6*16, loop # no more FPR args, fp_index = 6*16
+tabDouble:
+ LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loop # f8_f9 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loop # f10_f11 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loop # f12_f13 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loop # f14_f15 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loop # f16_f17 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loop # f18_f19 = curr_arg; if FPU32, fp_index += 16
+ LOAD_END t7, 6*16, loop # no more FPR args, fp_index = 6*16
END art_quick_invoke_stub
/*
@@ -1016,64 +1095,63 @@
addiu $sp, $sp, 16 # restore stack after memcpy
lw $gp, 16($fp) # restore $gp
lw $a0, SPILL_SIZE($fp) # restore ArtMethod*
- addiu $t0, $sp, 4 # t0 = pointer to the current argument (skip ArtMethod*)
- li $t3, 1 # t3 = gpr_index = 1 (skip A0)
- move $t4, $zero # t4 = fp_index = 0
- lw $t1, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
+ addiu $t8, $sp, 4 # t8 = pointer to the current argument (skip ArtMethod*)
+ li $t6, 0 # t6 = gpr_index = 0 (corresponds to A1; A0 is skipped)
+ li $t7, 0 # t7 = fp_index = 0
+ lw $t9, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
# as the $fp is SPILL_SIZE bytes below the $sp on entry)
- addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type)
+ addiu $t9, 1 # t9 = shorty + 1 (skip 1 for return type)
+
+ // Load the base addresses of tabIntS ... tabDoubleS.
+ // We will use the register indices (gpr_index, fp_index) to branch.
+ // Note that the indices are scaled by 16, so they can be added to the bases directly.
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+ lapc $t2, tabIntS
+ lapc $t3, tabLongS
+ lapc $t4, tabSingleS
+ lapc $t5, tabDoubleS
+#else
+ bltzal $zero, tabBaseS # nal
+ addiu $t2, $ra, %lo(tabIntS - tabBaseS)
+tabBaseS:
+ addiu $t3, $ra, %lo(tabLongS - tabBaseS)
+ addiu $t4, $ra, %lo(tabSingleS - tabBaseS)
+ addiu $t5, $ra, %lo(tabDoubleS - tabBaseS)
+#endif
+
loopS:
- lbu $t2, 0($t1) # t2 = shorty[i]
- beqz $t2, loopEndS # finish getting args when shorty[i] == '\0'
- addiu $t1, 1
+ lbu $ra, 0($t9) # ra = shorty[i]
+ beqz $ra, loopEndS # finish getting args when shorty[i] == '\0'
+ addiu $t9, 1
- li $t9, 'J' # put char 'J' into t9
- beq $t9, $t2, isLongS # branch if result type char == 'J'
- li $t9, 'D' # put char 'D' into t9
- beq $t9, $t2, isDoubleS # branch if result type char == 'D'
- li $t9, 'F' # put char 'F' into t9
- beq $t9, $t2, isSingleS # branch if result type char == 'F'
- addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot,
- # for both, int and single)
+ addiu $ra, -'J'
+ beqz $ra, isLongS # branch if result type char == 'J'
+ addiu $ra, 'J' - 'D'
+ beqz $ra, isDoubleS # branch if result type char == 'D'
+ addiu $ra, 'D' - 'F'
+ beqz $ra, isSingleS # branch if result type char == 'F'
- li $t5, 1 # skip a0 (ArtMethod*)
- bne $t5, $t3, 1f # if (gpr_index == 1)
- addiu $t5, 1
- LOAD_WORD_TO_REG a1, t0, t3, loopS # a1 = current argument, gpr_index++
-1: bne $t5, $t3, 2f # else if (gpr_index == 2)
- addiu $t5, 1
- LOAD_WORD_TO_REG a2, t0, t3, loopS # a2 = current argument, gpr_index++
-2: bne $t5, $t3, loopS # else if (gpr_index == 3)
- nop
- LOAD_WORD_TO_REG a3, t0, t3, loopS # a3 = current argument, gpr_index++
+ addu $ra, $t2, $t6
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
isLongS:
- addiu $t0, 8 # next_arg = curr_arg + 8
- slti $t5, $t3, 3
- beqz $t5, 3f # if (gpr_index < 3)
- nop
- LOAD_LONG_TO_REG a2, a3, t0, t3, loopS # a2_a3 = curr_arg, gpr_index = 4
-3: b loopS # else
- li $t3, 4 # gpr_index = 4
-
-isDoubleS:
- addiu $t0, 8 # next_arg = curr_arg + 8
- li $t5, 0
- bne $t5, $t4, 4f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loopS # f12_f13 = curr_arg, fp_index++
-4: bne $t5, $t4, loopS # else if (fp_index == 1)
- nop
- LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loopS # f14_f15 = curr_arg, fp_index++
+ addu $ra, $t3, $t6
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
isSingleS:
- li $t5, 0
- bne $t5, $t4, 5f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_FLOAT_TO_REG f12, t0, t4, loopS # f12 = curr_arg, fp_index++
-5: bne $t5, $t4, loopS # else if (fp_index == 1)
- nop
- LOAD_FLOAT_TO_REG f14, t0, t4, loopS # f14 = curr_arg, fp_index++
+ addu $ra, $t4, $t7
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
+
+isDoubleS:
+ addu $ra, $t5, $t7
+#if defined(__mips_isa_rev) && __mips_isa_rev > 2
+ addiu $t7, 16 # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG
+#endif
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
loopEndS:
lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code
@@ -1104,6 +1182,40 @@
SDu $f0, $f1, 0, $t0, $t1 # store floating point result
jalr $zero, $ra
nop
+
+ // Note that gpr_index is kept within the range of tabIntS and tabLongS
+ // and fp_index is kept within the range of tabSingleS and tabDoubleS.
+ .balign 16
+tabIntS:
+ LOAD_WORD_TO_REG a1, t8, t6, loopS # a1 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG a2, t8, t6, loopS # a2 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG a3, t8, t6, loopS # a3 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t0, t8, t6, loopS # t0 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t1, t8, t6, loopS # t1 = current argument, gpr_index += 16
+ LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16
+tabLongS:
+ LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS # a2_a3 = curr_arg, gpr_index = 3*16
+ LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS # a2_a3 = curr_arg, gpr_index = 3*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS # t0_t1 = curr_arg, gpr_index = 5*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS # t0_t1 = curr_arg, gpr_index = 5*16
+ LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16
+ LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16
+tabSingleS:
+ LOAD_FLOAT_TO_REG f8, t8, t7, loopS # f8 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f10, t8, t7, loopS # f10 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f12, t8, t7, loopS # f12 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f14, t8, t7, loopS # f14 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f16, t8, t7, loopS # f16 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f18, t8, t7, loopS # f18 = curr_arg, fp_index += 16
+ LOAD_END t7, 6*16, loopS # no more FPR args, fp_index = 6*16
+tabDoubleS:
+ LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loopS # f8_f9 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loopS # f10_f11 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loopS # f12_f13 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loopS # f14_f15 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loopS # f16_f17 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loopS # f18_f19 = curr_arg; if FPU32, fp_index += 16
+ LOAD_END t7, 6*16, loopS # no more FPR args, fp_index = 6*16
END art_quick_invoke_static_stub
#undef SPILL_SIZE
@@ -1886,9 +1998,9 @@
la $t9, artQuickProxyInvokeHandler
jalr $t9 # (Method* proxy method, receiver, Thread*, SP)
addiu $a3, $sp, ARG_SLOT_SIZE # pass $sp (remove arg slots)
- lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
+ lw $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
RESTORE_SAVE_REFS_AND_ARGS_FRAME
- bnez $t0, 1f
+ bnez $t7, 1f
# don't care if $v0 and/or $v1 are modified, when exception branch taken
MTD $v0, $v1, $f0, $f1 # move float value to return value
jalr $zero, $ra
@@ -1900,25 +2012,25 @@
/*
* Called to resolve an imt conflict.
* a0 is the conflict ArtMethod.
- * t0 is a hidden argument that holds the target interface method's dex method index.
+ * t7 is a hidden argument that holds the target interface method's dex method index.
*
- * Note that this stub writes to a0, t0 and t1.
+ * Note that this stub writes to a0, t7 and t8.
*/
ENTRY art_quick_imt_conflict_trampoline
- lw $t1, 0($sp) # Load referrer.
- lw $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t1) # Load dex cache methods array.
- sll $t0, $t0, POINTER_SIZE_SHIFT # Calculate offset.
- addu $t0, $t1, $t0 # Add offset to base.
- lw $t0, 0($t0) # Load interface method.
+ lw $t8, 0($sp) # Load referrer.
+ lw $t8, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t8) # Load dex cache methods array.
+ sll $t7, $t7, POINTER_SIZE_SHIFT # Calculate offset.
+ addu $t7, $t8, $t7 # Add offset to base.
+ lw $t7, 0($t7) # Load interface method.
lw $a0, ART_METHOD_JNI_OFFSET_32($a0) # Load ImtConflictTable.
.Limt_table_iterate:
- lw $t1, 0($a0) # Load next entry in ImtConflictTable.
+ lw $t8, 0($a0) # Load next entry in ImtConflictTable.
# Branch if found.
- beq $t1, $t0, .Limt_table_found
+ beq $t8, $t7, .Limt_table_found
nop
# If the entry is null, the interface method is not in the ImtConflictTable.
- beqz $t1, .Lconflict_trampoline
+ beqz $t8, .Lconflict_trampoline
nop
# Iterate over the entries of the ImtConflictTable.
b .Limt_table_iterate
@@ -1928,7 +2040,7 @@
# We successfully hit an entry in the table. Load the target method and jump to it.
lw $a0, __SIZEOF_POINTER__($a0)
lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)
- jr $t9
+ jalr $zero, $t9
nop
.Lconflict_trampoline:
@@ -1972,7 +2084,7 @@
# The result of the call is:
# v0: ptr to native code, 0 on error.
# v1: ptr to the bottom of the used area of the alloca, can restore stack till here.
- beq $v0, $zero, 1f # check entry error
+ beq $v0, $zero, 2f # check entry error
move $t9, $v0 # save the code ptr
move $sp, $v1 # release part of the alloca
@@ -1980,10 +2092,22 @@
lw $a0, 0($sp)
lw $a1, 4($sp)
lw $a2, 8($sp)
-
- # Load FPRs the same as GPRs. Look at BuildNativeCallFrameStateMachine.
- jalr $t9 # native call
lw $a3, 12($sp)
+
+ # artQuickGenericJniTrampoline sets bit 0 of the native code address to 1
+ # when the first two arguments are both single precision floats. This lets
+ # us extract them properly from the stack and load into floating point
+ # registers.
+ MTD $a0, $a1, $f12, $f13
+ andi $t0, $t9, 1
+ xor $t9, $t9, $t0
+ bnez $t0, 1f
+ mtc1 $a1, $f14
+ MTD $a2, $a3, $f14, $f15
+
+1:
+ jalr $t9 # native call
+ nop
addiu $sp, $sp, 16 # remove arg slots
move $gp, $s3 # restore $gp from $s3
@@ -1999,18 +2123,18 @@
s.d $f0, 16($sp) # pass result_f
lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
- bne $t0, $zero, 1f # check for pending exceptions
+ bne $t0, $zero, 2f # check for pending exceptions
move $sp, $s8 # tear down the alloca
- # tear dpown the callee-save frame
+ # tear down the callee-save frame
RESTORE_SAVE_REFS_AND_ARGS_FRAME
MTD $v0, $v1, $f0, $f1 # move float value to return value
jalr $zero, $ra
nop
-1:
+2:
lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
# This will create a new save-all frame, required by the runtime.
DELIVER_PENDING_EXCEPTION
@@ -2023,9 +2147,9 @@
la $t9, artQuickToInterpreterBridge
jalr $t9 # (Method* method, Thread*, SP)
addiu $a2, $sp, ARG_SLOT_SIZE # pass $sp (remove arg slots)
- lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
+ lw $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
RESTORE_SAVE_REFS_AND_ARGS_FRAME
- bnez $t0, 1f
+ bnez $t7, 1f
# don't care if $v0 and/or $v1 are modified, when exception branch taken
MTD $v0, $v1, $f0, $f1 # move float value to return value
jalr $zero, $ra