Revert "Revert "[MIPS] Use hard float calling convention for managed code""
This reverts commit 7fee84c087e0f903e7d43bef180df047db1c8051.
Fixed issue with temporary registers on Mips32r6.
Change-Id: I93018927e6a6036cff2d55e6cda66d3212a4316b
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 4d5004f..8bc75e5 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -170,45 +170,47 @@
* callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
*/
.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
- addiu $sp, $sp, -64
- .cfi_adjust_cfa_offset 64
+ addiu $sp, $sp, -80
+ .cfi_adjust_cfa_offset 80
// Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 64)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 80)
#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(MIPS) size not as expected."
#endif
- sw $ra, 60($sp)
- .cfi_rel_offset 31, 60
- sw $s8, 56($sp)
- .cfi_rel_offset 30, 56
- sw $gp, 52($sp)
- .cfi_rel_offset 28, 52
- sw $s7, 48($sp)
- .cfi_rel_offset 23, 48
- sw $s6, 44($sp)
- .cfi_rel_offset 22, 44
- sw $s5, 40($sp)
- .cfi_rel_offset 21, 40
- sw $s4, 36($sp)
- .cfi_rel_offset 20, 36
- sw $s3, 32($sp)
- .cfi_rel_offset 19, 32
- sw $s2, 28($sp)
- .cfi_rel_offset 18, 28
- sw $a3, 24($sp)
- .cfi_rel_offset 7, 24
- sw $a2, 20($sp)
- .cfi_rel_offset 6, 20
- sw $a1, 16($sp)
- .cfi_rel_offset 5, 16
+ sw $ra, 76($sp)
+ .cfi_rel_offset 31, 76
+ sw $s8, 72($sp)
+ .cfi_rel_offset 30, 72
+ sw $gp, 68($sp)
+ .cfi_rel_offset 28, 68
+ sw $s7, 64($sp)
+ .cfi_rel_offset 23, 64
+ sw $s6, 60($sp)
+ .cfi_rel_offset 22, 60
+ sw $s5, 56($sp)
+ .cfi_rel_offset 21, 56
+ sw $s4, 52($sp)
+ .cfi_rel_offset 20, 52
+ sw $s3, 48($sp)
+ .cfi_rel_offset 19, 48
+ sw $s2, 44($sp)
+ .cfi_rel_offset 18, 44
+ sw $a3, 40($sp)
+ .cfi_rel_offset 7, 40
+ sw $a2, 36($sp)
+ .cfi_rel_offset 6, 36
+ sw $a1, 32($sp)
+ .cfi_rel_offset 5, 32
+ SDu $f14, $f15, 24, $sp, $t0
+ SDu $f12, $f13, 16, $sp, $t0
# bottom will hold Method*
.endm
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
- * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
* Clobbers $t0 and $sp
* Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
* Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
@@ -227,7 +229,7 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
- * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
* Clobbers $sp
* Use $a0 as the Method* and loads it into bottom of stack.
* Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
@@ -244,32 +246,34 @@
.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack
.cfi_adjust_cfa_offset -ARG_SLOT_SIZE
- lw $ra, 60($sp)
+ lw $ra, 76($sp)
.cfi_restore 31
- lw $s8, 56($sp)
+ lw $s8, 72($sp)
.cfi_restore 30
- lw $gp, 52($sp)
+ lw $gp, 68($sp)
.cfi_restore 28
- lw $s7, 48($sp)
+ lw $s7, 64($sp)
.cfi_restore 23
- lw $s6, 44($sp)
+ lw $s6, 60($sp)
.cfi_restore 22
- lw $s5, 40($sp)
+ lw $s5, 56($sp)
.cfi_restore 21
- lw $s4, 36($sp)
+ lw $s4, 52($sp)
.cfi_restore 20
- lw $s3, 32($sp)
+ lw $s3, 48($sp)
.cfi_restore 19
- lw $s2, 28($sp)
+ lw $s2, 44($sp)
.cfi_restore 18
- lw $a3, 24($sp)
+ lw $a3, 40($sp)
.cfi_restore 7
- lw $a2, 20($sp)
+ lw $a2, 36($sp)
.cfi_restore 6
- lw $a1, 16($sp)
+ lw $a1, 32($sp)
.cfi_restore 5
- addiu $sp, $sp, 64 # pop frame
- .cfi_adjust_cfa_offset -64
+ LDu $f14, $f15, 24, $sp, $t1
+ LDu $f12, $f13, 16, $sp, $t1
+ addiu $sp, $sp, 80 # pop frame
+ .cfi_adjust_cfa_offset -80
.endm
/*
@@ -484,6 +488,32 @@
INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
+.macro LOAD_WORD_TO_REG reg, next_arg, index, label
+ lw $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4)
+ b \label
+ addiu $\index, 1
+.endm
+
+.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index, label
+ lw $\reg1, -8($\next_arg) # next_arg points to argument after the current one (offset is 8)
+ lw $\reg2, -4($\next_arg)
+ b \label
+ li $\index, 4 # long can be loaded only to a2_a3 pair so index will be always 4
+.endm
+
+.macro LOAD_FLOAT_TO_REG reg, next_arg, index, label
+ lwc1 $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4)
+ b \label
+ addiu $\index, 1
+.endm
+
+.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index, tmp, label
+ LDu $\reg1, $\reg2, -8, $\next_arg, $\tmp # next_arg points to argument after the current one
+ # (offset is 8)
+ b \label
+ addiu $\index, 1
+.endm
+
/*
* Invocation stub for quick code.
* On entry:
@@ -510,21 +540,76 @@
.cfi_def_cfa_register 30
move $s1, $a3 # move managed thread pointer into s1
addiu $s0, $zero, SUSPEND_CHECK_INTERVAL # reset s0 to suspend check interval
- addiu $t0, $a2, 4 # create space for method pointer in frame.
+ addiu $t0, $a2, 4 # create space for ArtMethod* in frame.
subu $t0, $sp, $t0 # reserve & align *stack* to 16 bytes:
- srl $t0, $t0, 4 # native calling convention only aligns to 8B,
- sll $sp, $t0, 4 # so we have to ensure ART 16B alignment ourselves.
- addiu $a0, $sp, 4 # pass stack pointer + method ptr as dest for memcpy
+ srl $t0, $t0, 4 # native calling convention only aligns to 8B,
+ sll $sp, $t0, 4 # so we have to ensure ART 16B alignment ourselves.
+ addiu $a0, $sp, 4 # pass stack pointer + ArtMethod* as dest for memcpy
jal memcpy # (dest, src, bytes)
addiu $sp, $sp, -16 # make space for argument slots for memcpy
addiu $sp, $sp, 16 # restore stack after memcpy
- lw $a0, 16($fp) # restore method*
- lw $a1, 4($sp) # copy arg value for a1
- lw $a2, 8($sp) # copy arg value for a2
- lw $a3, 12($sp) # copy arg value for a3
+ lw $a0, 16($fp) # restore ArtMethod*
+ lw $a1, 4($sp) # a1 = this*
+ addiu $t0, $sp, 8 # t0 = pointer to the current argument (skip ArtMethod* and this*)
+ li $t3, 2 # t3 = gpr_index = 2 (skip A0 and A1)
+ move $t4, $zero # t4 = fp_index = 0
+ lw $t1, 20+16($fp) # get shorty (20 is offset from the $sp on entry + 16 as the $fp is
+ # 16 bytes below the $sp on entry)
+ addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type)
+loop:
+ lbu $t2, 0($t1) # t2 = shorty[i]
+ beqz $t2, loopEnd # finish getting args when shorty[i] == '\0'
+ addiu $t1, 1
+
+ li $t9, 'J' # put char 'J' into t9
+ beq $t9, $t2, isLong # branch if result type char == 'J'
+ li $t9, 'D' # put char 'D' into t9
+ beq $t9, $t2, isDouble # branch if result type char == 'D'
+ li $t9, 'F' # put char 'F' into t9
+ beq $t9, $t2, isSingle # branch if result type char == 'F'
+ addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot,
+ # for both, int and single)
+
+ li $t5, 2 # skip a0 and a1 (ArtMethod* and this*)
+ bne $t5, $t3, 1f # if (gpr_index == 2)
+ addiu $t5, 1
+ LOAD_WORD_TO_REG a2, t0, t3, loop # a2 = current argument, gpr_index++
+1: bne $t5, $t3, loop # else if (gpr_index == 3)
+ nop
+ LOAD_WORD_TO_REG a3, t0, t3, loop # a3 = current argument, gpr_index++
+
+isLong:
+ addiu $t0, 8 # next_arg = curr_arg + 8
+ slti $t5, $t3, 3
+ beqz $t5, 2f # if (gpr_index < 3)
+ nop
+ LOAD_LONG_TO_REG a2, a3, t0, t3, loop # a2_a3 = curr_arg, gpr_index = 4
+2: b loop # else
+ li $t3, 4 # gpr_index = 4
+
+isDouble:
+ addiu $t0, 8 # next_arg = curr_arg + 8
+ li $t5, 0
+ bne $t5, $t4, 3f # if (fp_index == 0)
+ addiu $t5, 1
+ LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loop # f12_f13 = curr_arg, fp_index++
+3: bne $t5, $t4, loop # else if (fp_index == 1)
+ nop
+ LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loop # f14_f15 = curr_arg, fp_index++
+
+isSingle:
+ li $t5, 0
+ bne $t5, $t4, 4f # if (fp_index == 0)
+ addiu $t5, 1
+ LOAD_FLOAT_TO_REG f12, t0, t4, loop # f12 = curr_arg, fp_index++
+4: bne $t5, $t4, loop # else if (fp_index == 1)
+ nop
+ LOAD_FLOAT_TO_REG f14, t0, t4, loop # f14 = curr_arg, fp_index++
+
+loopEnd:
lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code
jalr $t9 # call the method
- sw $zero, 0($sp) # store null for method* at bottom of frame
+ sw $zero, 0($sp) # store null for ArtMethod* at bottom of frame
move $sp, $fp # restore the stack
lw $s0, 0($sp)
.cfi_restore 16
@@ -539,20 +624,145 @@
lw $t0, 16($sp) # get result pointer
lw $t1, 20($sp) # get shorty
lb $t1, 0($t1) # get result type char
- li $t2, 68 # put char 'D' into t2
- beq $t1, $t2, 1f # branch if result type char == 'D'
- li $t3, 70 # put char 'F' into t3
- beq $t1, $t3, 1f # branch if result type char == 'F'
+ li $t2, 'D' # put char 'D' into t2
+ beq $t1, $t2, 5f # branch if result type char == 'D'
+ li $t3, 'F' # put char 'F' into t3
+ beq $t1, $t3, 5f # branch if result type char == 'F'
sw $v0, 0($t0) # store the result
jalr $zero, $ra
sw $v1, 4($t0) # store the other half of the result
-1:
+5:
SDu $f0, $f1, 0, $t0, $t1 # store floating point result
jalr $zero, $ra
nop
END art_quick_invoke_stub
/*
+ * Invocation static stub for quick code.
+ * On entry:
+ * a0 = method pointer
+ * a1 = argument array or null for no argument methods
+ * a2 = size of argument array in bytes
+ * a3 = (managed) thread pointer
+ * [sp + 16] = JValue* result
+ * [sp + 20] = shorty
+ */
+ENTRY art_quick_invoke_static_stub
+ sw $a0, 0($sp) # save out a0
+ addiu $sp, $sp, -16 # spill s0, s1, fp, ra
+ .cfi_adjust_cfa_offset 16
+ sw $ra, 12($sp)
+ .cfi_rel_offset 31, 12
+ sw $fp, 8($sp)
+ .cfi_rel_offset 30, 8
+ sw $s1, 4($sp)
+ .cfi_rel_offset 17, 4
+ sw $s0, 0($sp)
+ .cfi_rel_offset 16, 0
+ move $fp, $sp # save sp in fp
+ .cfi_def_cfa_register 30
+ move $s1, $a3 # move managed thread pointer into s1
+ addiu $s0, $zero, SUSPEND_CHECK_INTERVAL # reset s0 to suspend check interval
+ addiu $t0, $a2, 4 # create space for ArtMethod* in frame.
+ subu $t0, $sp, $t0 # reserve & align *stack* to 16 bytes:
+ srl $t0, $t0, 4 # native calling convention only aligns to 8B,
+ sll $sp, $t0, 4 # so we have to ensure ART 16B alignment ourselves.
+ addiu $a0, $sp, 4 # pass stack pointer + ArtMethod* as dest for memcpy
+ jal memcpy # (dest, src, bytes)
+ addiu $sp, $sp, -16 # make space for argument slots for memcpy
+ addiu $sp, $sp, 16 # restore stack after memcpy
+ lw $a0, 16($fp) # restore ArtMethod*
+ addiu $t0, $sp, 4 # t0 = pointer to the current argument (skip ArtMethod*)
+ li $t3, 1 # t3 = gpr_index = 1 (skip A0)
+ move $t4, $zero # t4 = fp_index = 0
+ lw $t1, 20+16($fp) # get shorty (20 is offset from the $sp on entry + 16 as the $fp is
+ # 16 bytes below the $sp on entry)
+ addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type)
+loopS:
+ lbu $t2, 0($t1) # t2 = shorty[i]
+ beqz $t2, loopEndS # finish getting args when shorty[i] == '\0'
+ addiu $t1, 1
+
+ li $t9, 'J' # put char 'J' into t9
+ beq $t9, $t2, isLongS # branch if result type char == 'J'
+ li $t9, 'D' # put char 'D' into t9
+ beq $t9, $t2, isDoubleS # branch if result type char == 'D'
+ li $t9, 'F' # put char 'F' into t9
+ beq $t9, $t2, isSingleS # branch if result type char == 'F'
+ addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot,
+ # for both, int and single)
+
+ li $t5, 1 # skip a0 (ArtMethod*)
+ bne $t5, $t3, 1f # if (gpr_index == 1)
+ addiu $t5, 1
+ LOAD_WORD_TO_REG a1, t0, t3, loopS # a1 = current argument, gpr_index++
+1: bne $t5, $t3, 2f # else if (gpr_index == 2)
+ addiu $t5, 1
+ LOAD_WORD_TO_REG a2, t0, t3, loopS # a2 = current argument, gpr_index++
+2: bne $t5, $t3, loopS # else if (gpr_index == 3)
+ nop
+ LOAD_WORD_TO_REG a3, t0, t3, loopS # a3 = current argument, gpr_index++
+
+isLongS:
+ addiu $t0, 8 # next_arg = curr_arg + 8
+ slti $t5, $t3, 3
+ beqz $t5, 3f # if (gpr_index < 3)
+ nop
+ LOAD_LONG_TO_REG a2, a3, t0, t3, loopS # a2_a3 = curr_arg, gpr_index = 4
+3: b loopS # else
+ li $t3, 4 # gpr_index = 4
+
+isDoubleS:
+ addiu $t0, 8 # next_arg = curr_arg + 8
+ li $t5, 0
+ bne $t5, $t4, 4f # if (fp_index == 0)
+ addiu $t5, 1
+ LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loopS # f12_f13 = curr_arg, fp_index++
+4: bne $t5, $t4, loopS # else if (fp_index == 1)
+ nop
+ LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loopS # f14_f15 = curr_arg, fp_index++
+
+isSingleS:
+ li $t5, 0
+ bne $t5, $t4, 5f # if (fp_index == 0)
+ addiu $t5, 1
+ LOAD_FLOAT_TO_REG f12, t0, t4, loopS # f12 = curr_arg, fp_index++
+5: bne $t5, $t4, loopS # else if (fp_index == 1)
+ nop
+ LOAD_FLOAT_TO_REG f14, t0, t4, loopS # f14 = curr_arg, fp_index++
+
+loopEndS:
+ lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code
+ jalr $t9 # call the method
+ sw $zero, 0($sp) # store null for ArtMethod* at bottom of frame
+ move $sp, $fp # restore the stack
+ lw $s0, 0($sp)
+ .cfi_restore 16
+ lw $s1, 4($sp)
+ .cfi_restore 17
+ lw $fp, 8($sp)
+ .cfi_restore 30
+ lw $ra, 12($sp)
+ .cfi_restore 31
+ addiu $sp, $sp, 16
+ .cfi_adjust_cfa_offset -16
+ lw $t0, 16($sp) # get result pointer
+ lw $t1, 20($sp) # get shorty
+ lb $t1, 0($t1) # get result type char
+ li $t2, 'D' # put char 'D' into t2
+ beq $t1, $t2, 6f # branch if result type char == 'D'
+ li $t3, 'F' # put char 'F' into t3
+ beq $t1, $t3, 6f # branch if result type char == 'F'
+ sw $v0, 0($t0) # store the result
+ jalr $zero, $ra
+ sw $v1, 4($t0) # store the other half of the result
+6:
+ SDu $f0, $f1, 0, $t0, $t1 # store floating point result
+ jalr $zero, $ra
+ nop
+END art_quick_invoke_static_stub
+
+ /*
* Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
* failure.
*/