Changes to remove need for compiled invoke stubs for quick.

ARM, x86, and MIPS implementation complete, though MIPS is untested.

The ArgArray is changed to be a uint32_t array instead of a JValue array.
Also, a separate result for float/double was needed for x86/MIPS. The invoke
stubs are currently still there, but only used for portable.

Change-Id: I0647f8d5d420cea61370e662e85bdc0c13b5e378
diff --git a/src/oat/runtime/mips/runtime_support_mips.S b/src/oat/runtime/mips/runtime_support_mips.S
index 56535b2..cc41d14 100644
--- a/src/oat/runtime/mips/runtime_support_mips.S
+++ b/src/oat/runtime/mips/runtime_support_mips.S
@@ -427,6 +427,63 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
+     * Invocation stub.
+     * On entry:
+     *   a0 = method pointer
+     *   a1 = argument array or NULL for no argument methods
+     *   a2 = size of argument array in bytes
+     *   a3 = (managed) thread pointer
+     *   [sp + 16] = JValue* result for non-floating point returns
+     *   [sp + 20] = JValue* result for floating point returns
+     */
+ENTRY art_quick_invoke_stub
+    GENERATE_GLOBAL_POINTER
+    sw    $a0, 0($sp)           # save out a0
+    addiu $sp, $sp, -16         # spill s0, s1, fp, ra
+    .cfi_adjust_cfa_offset 16
+    sw    $ra, 12($sp)
+    .cfi_rel_offset 31, 12
+    sw    $fp, 8($sp)
+    .cfi_rel_offset 30, 8
+    sw    $s1, 4($sp)
+    .cfi_rel_offset 17, 4
+    sw    $s0, 0($sp)
+    .cfi_rel_offset 16, 0
+    move  $fp, $sp              # save sp in fp
+    .cfi_def_cfa_register 30
+    move  $s1, $a3              # move managed thread pointer into s1
+    addiu $s0, $zero, SUSPEND_CHECK_INTERVAL  # reset s0 to suspend check interval
+    addiu $t0, $a2, 16          # create space for method pointer in frame
+    srl   $t0, $t0, 3           # shift the frame size right 3
+    sll   $t0, $t0, 3           # shift the frame size left 3 to align to 16 bytes
+    subu  $sp, $sp, $t0         # reserve stack space for argument array
+    addiu $a0, $sp, 4           # pass stack pointer + method ptr as dest for memcpy
+    jal   memcpy                # (dest, src, bytes)
+    addiu $sp, $sp, -16         # make space for argument slots for memcpy
+    addiu $sp, $sp, 16          # restore stack after memcpy
+    lw    $a0, 16($fp)          # restore method*
+    lw    $a1, 4($sp)           # copy arg value for a1
+    lw    $a2, 8($sp)           # copy arg value for a2
+    lw    $a3, 12($sp)          # copy arg value for a3
+    lw    $t9, METHOD_CODE_OFFSET($a0)  # get pointer to the code
+    jalr  $t9                   # call the method
+    sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
+    move  $sp, $fp              # restore the stack
+    lw    $s0, 0($sp)
+    lw    $s1, 4($sp)
+    lw    $fp, 8($sp)
+    lw    $ra, 12($sp)
+    addiu $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+    lw    $t0, 16($sp)          # get result pointer
+    sw    $v0, 0($t0)           # store the result
+    sw    $v1, 4($t0)           # store the other half of the result
+    lw    $t0, 20($sp)          # get floating point result pointer
+    jr    $ra
+    s.d   $f0, 0($t0)           # store floating point result
+END art_quick_invoke_stub
+
+    /*
      * Entry point of native methods when JNI bug compatibility is enabled.
      */
     .extern artWorkAroundAppJniBugs