Upcall support for x86-64.

Sufficient to pass jni_internal_test.

Change-Id: Ia0d9b8241ab8450e04765b9c32eb6dc8fc1a8733
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index d425ed8..444fa22 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,6 +19,13 @@
 
 #include "asm_support.h"
 
+// Offset of field Runtime::callee_save_methods_[kSaveAll]
+#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 208
+// Offset of field Runtime::callee_save_methods_[kRefsOnly]
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 216
+// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
+#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 224
+
 // Offset of field Thread::self_ verified in InitCpu
 #define THREAD_SELF_OFFSET 72
 // Offset of field Thread::card_table_ verified in InitCpu
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 44bc7a2..ac238f0 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -46,12 +46,63 @@
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
      */
 MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
-    int3
-    int3
+    // R10 := Runtime::Current()
+    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
+    movq (%r10), %r10
+    // Save callee and GPR args, mixed together to agree with core spills bitmap.
+    PUSH r15  // Callee save.
+    PUSH r14  // Callee save.
+    PUSH r13  // Callee save.
+    PUSH r12  // Callee save.
+    PUSH r9   // Arg.
+    PUSH r8   // Arg.
+    PUSH rsi  // Arg.
+    PUSH rbp  // Callee save.
+    PUSH rbx  // Callee save.
+    PUSH rdx  // Arg.
+    PUSH rcx  // Arg.
+    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
+    subq LITERAL(80), %rsp
+    CFI_ADJUST_CFA_OFFSET(80)
+    // R10 := ArtMethod* for ref and args callee save frame method.
+    movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    // Save FPRs.
+    movq %xmm0, 16(%rsp)
+    movq %xmm1, 24(%rsp)
+    movq %xmm2, 32(%rsp)
+    movq %xmm3, 40(%rsp)
+    movq %xmm4, 48(%rsp)
+    movq %xmm5, 56(%rsp)
+    movq %xmm6, 64(%rsp)
+    movq %xmm7, 72(%rsp)
+    // Store ArtMethod* to bottom of stack.
+    movq %r10, 0(%rsp)
 END_MACRO
 
 MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
-    int3
+    // Restore FPRs.
+    movq 16(%rsp), %xmm0
+    movq 24(%rsp), %xmm1
+    movq 32(%rsp), %xmm2
+    movq 40(%rsp), %xmm3
+    movq 48(%rsp), %xmm4
+    movq 56(%rsp), %xmm5
+    movq 64(%rsp), %xmm6
+    movq 72(%rsp), %xmm7
+    addq LITERAL(80), %rsp
+    CFI_ADJUST_CFA_OFFSET(-80)
+    // Save callee and GPR args, mixed together to agree with core spills bitmap.
+    POP rcx
+    POP rdx
+    POP rbx
+    POP rbp
+    POP rsi
+    POP r8
+    POP r9
+    POP r12
+    POP r13
+    POP r14
+    POP r15
 END_MACRO
 
     /*
@@ -147,20 +198,210 @@
 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
+
+    /*
+     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
+     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
+     * the end of the shorty.
+     */
+MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
+1: // LOOP
+    movb (%r10), %al              // al := *shorty
+    addq LITERAL(1), %r10         // shorty++
+    cmpb LITERAL(0), %al          // if (al == '\0') goto xmm_setup_finished
+    je VAR(finished, 1)
+    cmpb LITERAL(68), %al         // if (al == 'D') goto FOUND_DOUBLE
+    je 2f
+    cmpb LITERAL(70), %al         // if (al == 'F') goto FOUND_FLOAT
+    je 3f
+    addq LITERAL(4), %r11         // arg_array++
+    //  Handle extra space in arg array taken by a long.
+    cmpb LITERAL(74), %al         // if (al != 'J') goto LOOP
+    jne 1b
+    addq LITERAL(4), %r11         // arg_array++
+    jmp 1b                        // goto LOOP
+2:  // FOUND_DOUBLE
+    movsd (%r11), REG_VAR(xmm_reg, 0)
+    addq LITERAL(8), %r11         // arg_array+=2
+    jmp 4f
+3:  // FOUND_FLOAT
+    movss (%r11), REG_VAR(xmm_reg, 0)
+    addq LITERAL(4), %r11         // arg_array++
+4:
+END_MACRO
+
+    /*
+     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
+     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
+     * the end of the shorty.
+     */
+MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
+1: // LOOP
+    movb (%r10), %al              // al := *shorty
+    addq LITERAL(1), %r10         // shorty++
+    cmpb LITERAL(0), %al          // if (al == '\0') goto gpr_setup_finished
+    je  VAR(finished, 2)
+    cmpb LITERAL(74), %al         // if (al == 'J') goto FOUND_LONG
+    je 2f
+    cmpb LITERAL(70), %al         // if (al == 'F') goto SKIP_FLOAT
+    je 3f
+    cmpb LITERAL(68), %al         // if (al == 'D') goto SKIP_DOUBLE
+    je 4f
+    movl (%r11), REG_VAR(gpr_reg32, 1)
+    addq LITERAL(4), %r11         // arg_array++
+    jmp 5f
+2:  // FOUND_LONG
+    movq (%r11), REG_VAR(gpr_reg64, 0)
+    addq LITERAL(8), %r11         // arg_array+=2
+    jmp 5f
+3:  // SKIP_FLOAT
+    addq LITERAL(4), %r11         // arg_array++
+    jmp 1b
+4:  // SKIP_DOUBLE
+    addq LITERAL(8), %r11         // arg_array+=2
+    jmp 1b
+5:
+END_MACRO
+
     /*
      * Quick invocation stub.
      * On entry:
      *   [sp] = return address
      *   rdi = method pointer
-     *   rsi = argument array or NULL for no argument methods
+     *   rsi = argument array that must at least contain the this pointer.
      *   rdx = size of argument array in bytes
      *   rcx = (managed) thread pointer
      *   r8 = JValue* result
      *   r9 = char* shorty
      */
 DEFINE_FUNCTION art_quick_invoke_stub
-    int3
-    int3
+    // Set up argument XMM registers.
+    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
+    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
+    .balign 16
+.Lxmm_setup_finished:
+    PUSH rbp                      // Save rbp.
+    PUSH r8                       // Save r8/result*.
+    PUSH r9                       // Save r9/shorty*.
+    mov %rsp, %rbp                // Copy value of stack pointer into base pointer.
+    CFI_DEF_CFA_REGISTER(rbp)
+    movl %edx, %r10d
+    addl LITERAL(64), %edx        // Reserve space for return addr, method*, rbp, r8 and r9 in frame.
+    andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
+    subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
+    subq %rdx, %rsp               // Reserve stack space for argument array.
+    movq LITERAL(0), (%rsp)       // Store NULL for method*
+    movl %r10d, %ecx              // Place size of args in rcx.
+    movq %rdi, %rax               // RAX := method to be called
+    movq %rsi, %r11               // R11 := arg_array
+    leaq 8(%rsp), %rdi            // Rdi is pointing just above the method* in the stack arguments.
+    // Copy arg array into stack.
+    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
+    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
+    movq %rax, %rdi               // RDI := method to be called
+    movl (%r11), %esi             // RSI := this pointer
+    addq LITERAL(4), %r11         // arg_array++
+    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
+    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
+    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
+    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
+.Lgpr_setup_finished:
+    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
+    movq %rbp, %rsp               // Restore stack pointer.
+    CFI_DEF_CFA_REGISTER(rsp)
+    POP r9                        // Pop r9 - shorty*.
+    POP r8                        // Pop r8 - result*.
+    POP rbp                       // Pop rbp
+    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
+    je .Lreturn_double_quick
+    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
+    je .Lreturn_float_quick
+    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
+    ret
+.Lreturn_double_quick:
+    movsd %xmm0, (%r8)           // Store the double floating point result.
+    ret
+.Lreturn_float_quick:
+    movss %xmm0, (%r8)           // Store the floating point result.
+    ret
+END_FUNCTION art_quick_invoke_stub
+
+    /*
+     * Quick invocation stub.
+     * On entry:
+     *   [sp] = return address
+     *   rdi = method pointer
+     *   rsi = argument array or NULL if no arguments.
+     *   rdx = size of argument array in bytes
+     *   rcx = (managed) thread pointer
+     *   r8 = JValue* result
+     *   r9 = char* shorty
+     */
+DEFINE_FUNCTION art_quick_invoke_static_stub
+    // Set up argument XMM registers.
+    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
+    movq %rsi, %r11               // R11 := arg_array
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
+    .balign 16
+.Lxmm_setup_finished2:
+    PUSH rbp                      // Save rbp.
+    PUSH r8                       // Save r8/result*.
+    PUSH r9                       // Save r9/shorty*.
+    mov %rsp, %rbp                // Copy value of stack pointer into base pointer.
+    CFI_DEF_CFA_REGISTER(rbp)
+    movl %edx, %r10d
+    addl LITERAL(64), %edx        // Reserve space for return addr, method*, rbp, r8 and r9 in frame.
+    andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
+    subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
+    subq %rdx, %rsp               // Reserve stack space for argument array.
+    movq LITERAL(0), (%rsp)       // Store NULL for method*
+    movl %r10d, %ecx              // Place size of args in rcx.
+    movq %rdi, %rax               // RAX := method to be called
+    movq %rsi, %r11               // R11 := arg_array
+    leaq 8(%rsp), %rdi            // Rdi is pointing just above the method* in the stack arguments.
+    // Copy arg array into stack.
+    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
+    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
+    movq %rax, %rdi               // RDI := method to be called
+    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
+    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
+.Lgpr_setup_finished2:
+    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
+    movq %rbp, %rsp               // Restore stack pointer.
+    CFI_DEF_CFA_REGISTER(rsp)
+    POP r9                        // Pop r9 - shorty*.
+    POP r8                        // Pop r8 - result*.
+    POP rbp                       // Pop rbp
+    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
+    je .Lreturn_double_quick2
+    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
+    je .Lreturn_float_quick2
+    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
+    ret
+.Lreturn_double_quick2:
+    movsd %xmm0, (%r8)           // Store the double floating point result.
+    ret
+.Lreturn_float_quick2:
+    movss %xmm0, (%r8)           // Store the floating point result.
+    ret
 END_FUNCTION art_quick_invoke_stub
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
@@ -210,8 +451,11 @@
 END_MACRO
 
 MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
-    int3
-    int3
+    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
+    testq %rcx, %rcx               // rcx == 0 ?
+    jnz 1f                         // if rcx != 0 goto 1
+    ret                            // return
+1:                                 // deliver exception on current thread
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
@@ -390,7 +634,22 @@
      */
 UNIMPLEMENTED art_quick_imt_conflict_trampoline
 UNIMPLEMENTED art_quick_resolution_trampoline
-UNIMPLEMENTED art_quick_to_interpreter_bridge
+
+    /*
+     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
+     * of a quick call:
+     * RDI = method being called / to bridge to.
+     * RSI, RDX, RCX, R8, R9 are arguments to that method.
+     */
+DEFINE_FUNCTION art_quick_to_interpreter_bridge
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
+    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
+    movq %rsp, %rdx                        // RDX := sp
+    call PLT_SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    movq %rax, %xmm0                   // Place return value also into floating point return value.
+    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
+END_FUNCTION art_quick_to_interpreter_bridge
 
     /*
      * Routine that intercepts method calls and returns.
diff --git a/runtime/arch/x86_64/registers_x86_64.h b/runtime/arch/x86_64/registers_x86_64.h
index 9808d91..c1a9942 100644
--- a/runtime/arch/x86_64/registers_x86_64.h
+++ b/runtime/arch/x86_64/registers_x86_64.h
@@ -48,6 +48,26 @@
 };
 std::ostream& operator<<(std::ostream& os, const Register& rhs);
 
+enum FloatRegister {
+  XMM0 = 0,
+  XMM1 = 1,
+  XMM2 = 2,
+  XMM3 = 3,
+  XMM4 = 4,
+  XMM5 = 5,
+  XMM6 = 6,
+  XMM7 = 7,
+  XMM8 = 8,
+  XMM9 = 9,
+  XMM10 = 10,
+  XMM11 = 11,
+  XMM12 = 12,
+  XMM13 = 13,
+  XMM14 = 14,
+  XMM15 = 15,
+};
+std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs);
+
 }  // namespace x86_64
 }  // namespace art
 
diff --git a/runtime/arch/x86_64/thread_x86_64.cc b/runtime/arch/x86_64/thread_x86_64.cc
index 9e45a72..b74fc5d 100644
--- a/runtime/arch/x86_64/thread_x86_64.cc
+++ b/runtime/arch/x86_64/thread_x86_64.cc
@@ -48,6 +48,12 @@
   CHECK_EQ(self_check, this);
 
   // Sanity check other offsets.
+  CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET),
+           Runtime::GetCalleeSaveMethodOffset(Runtime::kSaveAll));
+  CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
+           Runtime::GetCalleeSaveMethodOffset(Runtime::kRefsOnly));
+  CHECK_EQ(static_cast<size_t>(RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
+           Runtime::GetCalleeSaveMethodOffset(Runtime::kRefsAndArgs));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
   CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 2a47e46..fe9d7b8 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -96,7 +96,7 @@
 // quasiatomic operations that are performed on partially-overlapping
 // memory.
 class QuasiAtomic {
-#if !defined(__arm__) && !defined(__i386__)
+#if defined(__mips__) && !defined(__LP64__)
   static constexpr bool kNeedSwapMutexes = true;
 #else
   static constexpr bool kNeedSwapMutexes = false;
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 012dabb..b3fce5a 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -35,10 +35,14 @@
 
 // Visits the arguments as saved to the stack by a Runtime::kRefAndArgs callee save frame.
 class QuickArgumentVisitor {
- public:
-// Offset to first (not the Method*) argument in a Runtime::kRefAndArgs callee save frame.
-// Size of Runtime::kRefAndArgs callee save frame.
-// Size of Method* and register parameters in out stack arguments.
+  // Size of each spilled GPR.
+#ifdef __LP64__
+  static constexpr size_t kBytesPerGprSpillLocation = 8;
+#else
+  static constexpr size_t kBytesPerGprSpillLocation = 4;
+#endif
+  // Number of bytes for each out register in the caller method's frame.
+  static constexpr size_t kBytesStackArgLocation = 4;
 #if defined(__arm__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -53,12 +57,19 @@
   // | R3         |    arg3
   // | R2         |    arg2
   // | R1         |    arg1
-  // | R0         |
+  // | R0         |    padding
   // | Method*    |  <- sp
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 8
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 44
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 48
-#define QUICK_STACK_ARG_SKIP 16
+  static constexpr bool kSoftFloatAbi = true;  // This is a soft float ABI.
+  static constexpr size_t kNumGprArgs = 3;  // 3 arguments passed in GPRs.
+  static constexpr size_t kNumFprArgs = 0;  // 0 arguments passed in FPRs.
+  static constexpr size_t kBytesPerFprSpillLocation = 4;  // FPR spill size is 4 bytes.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 8;  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 44;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 48;  // Frame size.
+  static size_t GprIndexToGprOffset(uint32_t gpr_index) {
+    return gpr_index * kBytesPerGprSpillLocation;
+  }
 #elif defined(__mips__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -74,10 +85,17 @@
   // | A2         |    arg2
   // | A1         |    arg1
   // | A0/Method* |  <- sp
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 4
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 60
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 64
-#define QUICK_STACK_ARG_SKIP 16
+  static constexpr bool kSoftFloatAbi = true;  // This is a soft float ABI.
+  static constexpr size_t kNumGprArgs = 3;  // 3 arguments passed in GPRs.
+  static constexpr size_t kNumFprArgs = 0;  // 0 arguments passed in FPRs.
+  static constexpr size_t kBytesPerFprSpillLocation = 4;  // FPR spill size is 4 bytes.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4;  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 60;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 64;  // Frame size.
+  static size_t GprIndexToGprOffset(uint32_t gpr_index) {
+    return gpr_index * kBytesPerGprSpillLocation;
+  }
 #elif defined(__i386__)
   // The callee save frame is pointed to by SP.
   // | argN        |  |
@@ -93,49 +111,96 @@
   // | EDX         |    arg2
   // | ECX         |    arg1
   // | EAX/Method* |  <- sp
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 4
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 28
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 32
-#define QUICK_STACK_ARG_SKIP 16
+  static constexpr bool kSoftFloatAbi = true;  // This is a soft float ABI.
+  static constexpr size_t kNumGprArgs = 3;  // 3 arguments passed in GPRs.
+  static constexpr size_t kNumFprArgs = 0;  // 0 arguments passed in FPRs.
+  static constexpr size_t kBytesPerFprSpillLocation = 8;  // FPR spill size is 8 bytes.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4;  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 32;  // Frame size.
+  static size_t GprIndexToGprOffset(uint32_t gpr_index) {
+    return gpr_index * kBytesPerGprSpillLocation;
+  }
 #elif defined(__x86_64__)
-// TODO: implement and check these.
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 8
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 56
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 64
-#define QUICK_STACK_ARG_SKIP 32
+  // The callee save frame is pointed to by SP.
+  // | argN            |  |
+  // | ...             |  |
+  // | reg. arg spills |  |  Caller's frame
+  // | Method*         | ---
+  // | Return          |
+  // | R15             |    callee save
+  // | R14             |    callee save
+  // | R13             |    callee save
+  // | R12             |    callee save
+  // | R9              |    arg5
+  // | R8              |    arg4
+  // | RSI/R6          |    arg1
+  // | RBP/R5          |    callee save
+  // | RBX/R3          |    callee save
+  // | RDX/R2          |    arg2
+  // | RCX/R1          |    arg3
+  // | XMM7            |    float arg 8
+  // | XMM6            |    float arg 7
+  // | XMM5            |    float arg 6
+  // | XMM4            |    float arg 5
+  // | XMM3            |    float arg 4
+  // | XMM2            |    float arg 3
+  // | XMM1            |    float arg 2
+  // | XMM0            |    float arg 1
+  // | Padding         |
+  // | RDI/Method*     |  <- sp
+  static constexpr bool kSoftFloatAbi = false;  // This is a hard float ABI.
+  static constexpr size_t kNumGprArgs = 5;  // 3 arguments passed in GPRs.
+  static constexpr size_t kNumFprArgs = 8;  // 0 arguments passed in FPRs.
+  static constexpr size_t kBytesPerFprSpillLocation = 8;  // FPR spill size is 8 bytes.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80;  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 176;  // Frame size.
+  static size_t GprIndexToGprOffset(uint32_t gpr_index) {
+    switch (gpr_index) {
+      case 0: return (4 * kBytesPerGprSpillLocation);
+      case 1: return (1 * kBytesPerGprSpillLocation);
+      case 2: return (0 * kBytesPerGprSpillLocation);
+      case 3: return (5 * kBytesPerGprSpillLocation);
+      case 4: return (6 * kBytesPerGprSpillLocation);
+      default:
+        LOG(FATAL) << "Unexpected GPR index: " << gpr_index;
+        return 0;
+    }
+  }
 #else
 #error "Unsupported architecture"
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 0
-#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 0
-#define QUICK_STACK_ARG_SKIP 0
 #endif
 
-  static mirror::ArtMethod* GetCallingMethod(mirror::ArtMethod** sp) {
-    byte* previous_sp = reinterpret_cast<byte*>(sp) +
-        QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE;
+ public:
+  static mirror::ArtMethod* GetCallingMethod(mirror::ArtMethod** sp)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK((*sp)->IsCalleeSaveMethod());
+    byte* previous_sp = reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize;
     return *reinterpret_cast<mirror::ArtMethod**>(previous_sp);
   }
 
-  static uintptr_t GetCallingPc(mirror::ArtMethod** sp) {
-    byte* lr = reinterpret_cast<byte*>(sp) + QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET;
+  // For the given quick ref and args quick frame, return the caller's PC.
+  static uintptr_t GetCallingPc(mirror::ArtMethod** sp)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK((*sp)->IsCalleeSaveMethod());
+    byte* lr = reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_LrOffset;
     return *reinterpret_cast<uintptr_t*>(lr);
   }
 
   QuickArgumentVisitor(mirror::ArtMethod** sp, bool is_static,
                        const char* shorty, uint32_t shorty_len)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
-    is_static_(is_static), shorty_(shorty), shorty_len_(shorty_len),
-    args_in_regs_(ComputeArgsInRegs(is_static, shorty, shorty_len)),
-    num_params_((is_static ? 0 : 1) + shorty_len - 1),  // +1 for this, -1 for return type
-    reg_args_(reinterpret_cast<byte*>(sp) + QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET),
-    stack_args_(reinterpret_cast<byte*>(sp) + QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE
-                + QUICK_STACK_ARG_SKIP),
-    cur_args_(reg_args_),
-    cur_arg_index_(0),
-    param_index_(0),
-    is_split_long_or_double_(false) {
-    DCHECK_EQ(static_cast<size_t>(QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE),
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
+      is_static_(is_static), shorty_(shorty), shorty_len_(shorty_len),
+      gpr_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset),
+      fpr_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset),
+      stack_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize
+                  + StackArgumentStartFromShorty(is_static, shorty, shorty_len)),
+      gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid),
+      is_split_long_or_double_(false) {
+    DCHECK_EQ(kQuickCalleeSaveFrame_RefAndArgs_FrameSize,
               Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
   }
 
@@ -143,30 +208,38 @@
 
   virtual void Visit() = 0;
 
-  Primitive::Type GetParamPrimitiveType() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    size_t index = param_index_;
-    if (is_static_) {
-      index++;  // 0th argument must skip return value at start of the shorty
-    } else if (index == 0) {
-      return Primitive::kPrimNot;
-    }
-    CHECK_LT(index, shorty_len_);
-    return Primitive::GetType(shorty_[index]);
+  Primitive::Type GetParamPrimitiveType() const {
+    return cur_type_;
   }
 
   byte* GetParamAddress() const {
-    return cur_args_ + (cur_arg_index_ * kPointerSize);
+    if (!kSoftFloatAbi) {
+      Primitive::Type type = GetParamPrimitiveType();
+      if (UNLIKELY((type == Primitive::kPrimDouble) || (type == Primitive::kPrimFloat))) {
+        if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) {
+          return fpr_args_ + (fpr_index_ * kBytesPerFprSpillLocation);
+        }
+      }
+    }
+    if (gpr_index_ < kNumGprArgs) {
+      return gpr_args_ + GprIndexToGprOffset(gpr_index_);
+    }
+    return stack_args_ + (stack_index_ * kBytesStackArgLocation);
   }
 
   bool IsSplitLongOrDouble() const {
-    return is_split_long_or_double_;
+    if ((kBytesPerGprSpillLocation == 4) || (kBytesPerFprSpillLocation == 4)) {
+      return is_split_long_or_double_;
+    } else {
+      return false;  // An optimization for when GPR and FPRs are 64bit.
+    }
   }
 
-  bool IsParamAReference() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool IsParamAReference() const {
     return GetParamPrimitiveType() == Primitive::kPrimNot;
   }
 
-  bool IsParamALongOrDouble() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool IsParamALongOrDouble() const {
     Primitive::Type type = GetParamPrimitiveType();
     return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
   }
@@ -179,51 +252,179 @@
   }
 
   void VisitArguments() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    for (cur_arg_index_ = 0;  cur_arg_index_ < args_in_regs_ && param_index_ < num_params_; ) {
-      is_split_long_or_double_ = (cur_arg_index_ == 2) && IsParamALongOrDouble();
+    gpr_index_ = 0;
+    fpr_index_ = 0;
+    stack_index_ = 0;
+    if (!is_static_) {  // Handle this.
+      cur_type_ = Primitive::kPrimNot;
+      is_split_long_or_double_ = false;
       Visit();
-      cur_arg_index_ += (IsParamALongOrDouble() ? 2 : 1);
-      param_index_++;
+      if (kNumGprArgs > 0) {
+        gpr_index_++;
+      } else {
+        stack_index_++;
+      }
     }
-    cur_args_ = stack_args_;
-    cur_arg_index_ = is_split_long_or_double_ ? 1 : 0;
-    is_split_long_or_double_ = false;
-    while (param_index_ < num_params_) {
-      Visit();
-      cur_arg_index_ += (IsParamALongOrDouble() ? 2 : 1);
-      param_index_++;
+    for (uint32_t shorty_index = 1; shorty_index < shorty_len_; ++shorty_index) {
+      cur_type_ = Primitive::GetType(shorty_[shorty_index]);
+      switch (cur_type_) {
+        case Primitive::kPrimNot:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+          is_split_long_or_double_ = false;
+          Visit();
+          if (gpr_index_ < kNumGprArgs) {
+            gpr_index_++;
+          } else {
+            stack_index_++;
+          }
+          break;
+        case Primitive::kPrimFloat:
+          is_split_long_or_double_ = false;
+          Visit();
+          if (kSoftFloatAbi) {
+            if (gpr_index_ < kNumGprArgs) {
+              gpr_index_++;
+            } else {
+              stack_index_++;
+            }
+          } else {
+            if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) {
+              fpr_index_++;
+            } else {
+              stack_index_++;
+            }
+          }
+          break;
+        case Primitive::kPrimDouble:
+        case Primitive::kPrimLong:
+          if (kSoftFloatAbi || (cur_type_ == Primitive::kPrimLong)) {
+            is_split_long_or_double_ = (kBytesPerGprSpillLocation == 4) &&
+                ((gpr_index_ + 1) == kNumGprArgs);
+            Visit();
+            if (gpr_index_ < kNumGprArgs) {
+              gpr_index_++;
+              if (kBytesPerGprSpillLocation == 4) {
+                if (gpr_index_ < kNumGprArgs) {
+                  gpr_index_++;
+                } else {
+                  stack_index_++;
+                }
+              }
+            } else {
+              if (kBytesStackArgLocation == 4) {
+                stack_index_+= 2;
+              } else {
+                CHECK_EQ(kBytesStackArgLocation, 8U);
+                stack_index_++;
+              }
+            }
+          } else {
+            is_split_long_or_double_ = (kBytesPerFprSpillLocation == 4) &&
+                ((fpr_index_ + 1) == kNumFprArgs);
+            Visit();
+            if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) {
+              fpr_index_++;
+              if (kBytesPerFprSpillLocation == 4) {
+                if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) {
+                  fpr_index_++;
+                } else {
+                  stack_index_++;
+                }
+              }
+            } else {
+              if (kBytesStackArgLocation == 4) {
+                stack_index_+= 2;
+              } else {
+                CHECK_EQ(kBytesStackArgLocation, 8U);
+                stack_index_++;
+              }
+            }
+          }
+          break;
+        default:
+          LOG(FATAL) << "Unexpected type: " << cur_type_ << " in " << shorty_;
+      }
     }
   }
 
  private:
-  static size_t ComputeArgsInRegs(bool is_static, const char* shorty, uint32_t shorty_len)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    size_t args_in_regs = (is_static ? 0 : 1);
-    for (size_t i = 0; i < shorty_len; i++) {
-      char s = shorty[i];
-      if (s == 'J' || s == 'D') {
-        args_in_regs += 2;
-      } else {
-        args_in_regs++;
+  static size_t StackArgumentStartFromShorty(bool is_static, const char* shorty,
+                                             uint32_t shorty_len) {
+    if (kSoftFloatAbi) {
+      CHECK_EQ(kNumFprArgs, 0U);
+      return (kNumGprArgs * kBytesPerGprSpillLocation) + kBytesPerGprSpillLocation /* ArtMethod* */;
+    } else {
+      size_t offset = kBytesPerGprSpillLocation;  // Skip Method*.
+      size_t gprs_seen = 0;
+      size_t fprs_seen = 0;
+      if (!is_static && (gprs_seen < kNumGprArgs)) {
+        gprs_seen++;
+        offset += kBytesStackArgLocation;
       }
-      if (args_in_regs > 3) {
-        args_in_regs = 3;
-        break;
+      for (uint32_t i = 1; i < shorty_len; ++i) {
+        switch (shorty[i]) {
+          case 'Z':
+          case 'B':
+          case 'C':
+          case 'S':
+          case 'I':
+          case 'L':
+            if (gprs_seen < kNumGprArgs) {
+              gprs_seen++;
+              offset += kBytesStackArgLocation;
+            }
+            break;
+          case 'J':
+            if (gprs_seen < kNumGprArgs) {
+              gprs_seen++;
+              offset += 2 * kBytesStackArgLocation;
+              if (kBytesPerGprSpillLocation == 4) {
+                if (gprs_seen < kNumGprArgs) {
+                  gprs_seen++;
+                }
+              }
+            }
+            break;
+          case 'F':
+            if ((kNumFprArgs != 0) && (fprs_seen + 1 < kNumFprArgs + 1)) {
+              fprs_seen++;
+              offset += kBytesStackArgLocation;
+            }
+            break;
+          case 'D':
+            if ((kNumFprArgs != 0) && (fprs_seen + 1 < kNumFprArgs + 1)) {
+              fprs_seen++;
+              offset += 2 * kBytesStackArgLocation;
+              if (kBytesPerFprSpillLocation == 4) {
+                if ((kNumFprArgs != 0) && (fprs_seen + 1 < kNumFprArgs + 1)) {
+                  fprs_seen++;
+                }
+              }
+            }
+            break;
+          default:
+            LOG(FATAL) << "Unexpected shorty character: " << shorty[i] << " in " << shorty;
+        }
       }
+      return offset;
     }
-    return args_in_regs;
   }
 
   const bool is_static_;
   const char* const shorty_;
   const uint32_t shorty_len_;
-  const size_t args_in_regs_;
-  const size_t num_params_;
-  byte* const reg_args_;
-  byte* const stack_args_;
-  byte* cur_args_;
-  size_t cur_arg_index_;
-  size_t param_index_;
+  byte* const gpr_args_;  // Address of GPR arguments in callee save frame.
+  byte* const fpr_args_;  // Address of FPR arguments in callee save frame.
+  byte* const stack_args_;  // Address of stack arguments in caller's frame.
+  uint32_t gpr_index_;  // Index into spilled GPRs.
+  uint32_t fpr_index_;  // Index into spilled FPRs.
+  uint32_t stack_index_;  // Index into arguments on the stack.
+  // The current type of argument during VisitArguments.
+  Primitive::Type cur_type_;
   // Does a 64bit parameter straddle the register and stack arguments?
   bool is_split_long_or_double_;
 };
@@ -231,9 +432,8 @@
 // Visits arguments on the stack placing them into the shadow frame.
 class BuildQuickShadowFrameVisitor : public QuickArgumentVisitor {
  public:
-  BuildQuickShadowFrameVisitor(mirror::ArtMethod** sp,
-      bool is_static, const char* shorty,
-       uint32_t shorty_len, ShadowFrame& sf, size_t first_arg_reg) :
+  BuildQuickShadowFrameVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
+                               uint32_t shorty_len, ShadowFrame* sf, size_t first_arg_reg) :
     QuickArgumentVisitor(sp, is_static, shorty, shorty_len), sf_(sf), cur_reg_(first_arg_reg) {}
 
   virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -242,14 +442,14 @@
       case Primitive::kPrimLong:  // Fall-through.
       case Primitive::kPrimDouble:
         if (IsSplitLongOrDouble()) {
-          sf_.SetVRegLong(cur_reg_, ReadSplitLongParam());
+          sf_->SetVRegLong(cur_reg_, ReadSplitLongParam());
         } else {
-          sf_.SetVRegLong(cur_reg_, *reinterpret_cast<jlong*>(GetParamAddress()));
+          sf_->SetVRegLong(cur_reg_, *reinterpret_cast<jlong*>(GetParamAddress()));
         }
         ++cur_reg_;
         break;
       case Primitive::kPrimNot:
-        sf_.SetVRegReference(cur_reg_, *reinterpret_cast<mirror::Object**>(GetParamAddress()));
+        sf_->SetVRegReference(cur_reg_, *reinterpret_cast<mirror::Object**>(GetParamAddress()));
         break;
       case Primitive::kPrimBoolean:  // Fall-through.
       case Primitive::kPrimByte:     // Fall-through.
@@ -257,7 +457,7 @@
       case Primitive::kPrimShort:    // Fall-through.
       case Primitive::kPrimInt:      // Fall-through.
       case Primitive::kPrimFloat:
-        sf_.SetVReg(cur_reg_, *reinterpret_cast<jint*>(GetParamAddress()));
+        sf_->SetVReg(cur_reg_, *reinterpret_cast<jint*>(GetParamAddress()));
         break;
       case Primitive::kPrimVoid:
         LOG(FATAL) << "UNREACHABLE";
@@ -267,8 +467,8 @@
   }
 
  private:
-  ShadowFrame& sf_;
-  size_t cur_reg_;
+  ShadowFrame* const sf_;
+  uint32_t cur_reg_;
 
   DISALLOW_COPY_AND_ASSIGN(BuildQuickShadowFrameVisitor);
 };
@@ -293,8 +493,8 @@
                                                   method, 0, memory));
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
     BuildQuickShadowFrameVisitor shadow_frame_builder(sp, mh.IsStatic(), mh.GetShorty(),
-                                                 mh.GetShortyLength(),
-                                                 *shadow_frame, first_arg_reg);
+                                                      mh.GetShortyLength(),
+                                                      shadow_frame, first_arg_reg);
     shadow_frame_builder.VisitArguments();
     // Push a transition back into managed code onto the linked list in thread.
     ManagedStack fragment;
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 362df8c..a0665b5 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2509,8 +2509,7 @@
       JniAbortF("NewDirectByteBuffer", "non-zero capacity for nullptr pointer: %" PRId64, capacity);
     }
 
-    // At the moment, the Java side is limited to 32 bits.
-    CHECK_LE(reinterpret_cast<uintptr_t>(address), 0xffffffff);
+    // At the moment, the capacity is limited to 32 bits.
     CHECK_LE(capacity, 0xffffffff);
     jlong address_arg = reinterpret_cast<jlong>(address);
     jint capacity_arg = static_cast<jint>(capacity);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 4c58c84..2dd7d96 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -127,7 +127,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "nop", "()V");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("V", 1);
     JValue result;
 
     if (!is_static) {
@@ -143,7 +143,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "identity", "(B)B");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("BB", 2);
     uint32_t* args = arg_array.GetArray();
     JValue result;
 
@@ -179,7 +179,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "identity", "(I)I");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("II", 2);
     uint32_t* args = arg_array.GetArray();
     JValue result;
 
@@ -215,7 +215,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "identity", "(D)D");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("DD", 2);
     uint32_t* args = arg_array.GetArray();
     JValue value;
     JValue result;
@@ -259,7 +259,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(II)I");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("III", 3);
     uint32_t* args = arg_array.GetArray();
     JValue result;
 
@@ -305,7 +305,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(III)I");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("IIII", 4);
     uint32_t* args = arg_array.GetArray();
     JValue result;
 
@@ -361,7 +361,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(IIII)I");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("IIIII", 5);
     uint32_t* args = arg_array.GetArray();
     JValue result;
 
@@ -422,7 +422,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(IIIII)I");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("IIIIII", 6);
     uint32_t* args = arg_array.GetArray();
     JValue result;
 
@@ -488,7 +488,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(DD)D");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("DDD", 3);
     uint32_t* args = arg_array.GetArray();
     JValue value;
     JValue value2;
@@ -559,7 +559,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(DDD)D");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("DDDD", 4);
     uint32_t* args = arg_array.GetArray();
     JValue value;
     JValue value2;
@@ -617,7 +617,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(DDDD)D");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("DDDDD", 5);
     uint32_t* args = arg_array.GetArray();
     JValue value;
     JValue value2;
@@ -684,7 +684,7 @@
     mirror::Object* receiver;
     JniInternalTestMakeExecutable(&method, &receiver, is_static, "sum", "(DDDDD)D");
 
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array("DDDDDD", 6);
     uint32_t* args = arg_array.GetArray();
     JValue value;
     JValue value2;
@@ -1784,7 +1784,7 @@
   mirror::ArtMethod* method = klass->FindDirectMethod("main", "([Ljava/lang/String;)V");
   ASSERT_TRUE(method != NULL);
 
-  ArgArray arg_array(NULL, 0);
+  ArgArray arg_array("VL", 2);
   arg_array.Append(0U);
   JValue result;
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index d5f7597..20d2b18 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -37,6 +37,10 @@
 extern "C" void art_portable_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
 extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                       const char*);
+#ifdef __x86_64__
+extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
+                                             const char*);
+#endif
 
 // TODO: get global references for these
 Class* ArtMethod::java_lang_reflect_ArtMethod_ = NULL;
@@ -276,7 +280,15 @@
                                                   : GetEntryPointFromPortableCompiledCode());
       }
       if (!IsPortableCompiled()) {
+#ifdef __x86_64__
+        if (!IsStatic()) {
+          (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty);
+        } else {
+          (*art_quick_invoke_static_stub)(this, args, args_size, self, result, shorty);
+        }
+#else
         (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty);
+#endif
       } else {
         (*art_portable_invoke_stub)(this, args, args_size, self, result, shorty[0]);
       }
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 71cc7af..86f5348 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -313,9 +313,9 @@
   void SetOatNativeGcMapOffset(uint32_t gc_map_offset);
   uint32_t GetOatNativeGcMapOffset();
 
-  size_t GetFrameSizeInBytes() {
-    DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
-    size_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_), false);
+  uint32_t GetFrameSizeInBytes() {
+    uint32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_),
+                                 false);
     DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
     return result;
   }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index e66e5af..3ccea36 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1486,12 +1486,18 @@
         (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
         (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
     uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
-                         (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
+                         (1 << art::x86_64::kNumberOfCpuRegisters);  // fake return address callee save
+    uint32_t fp_arg_spills =
+        (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
+        (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
+        (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
+    uint32_t fp_spills = (type == kRefsAndArgs ? fp_arg_spills : 0);
     size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+                                 __builtin_popcount(fp_spills) /* fprs */ +
                                  1 /* Method* */) * kPointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
-    method->SetFpSpillMask(0);
+    method->SetFpSpillMask(fp_spills);
   } else {
     UNIMPLEMENTED(FATAL) << instruction_set;
   }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 159de2e..223b8d5 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -429,6 +429,10 @@
     return callee_save_methods_[type];
   }
 
+  static size_t GetCalleeSaveMethodOffset(CalleeSaveType type) {
+    return OFFSETOF_MEMBER(Runtime, callee_save_methods_[type]);
+  }
+
   void SetCalleeSaveMethod(mirror::ArtMethod* method, CalleeSaveType type);
 
   mirror::ArtMethod* CreateCalleeSaveMethod(InstructionSet instruction_set,