Remove use of compiled invoke stubs from portable.

Now the invoke stubs can be safely removed. Tested and working on ARM,
basic testing done on x86/MIPS, but portable is currently broken for them
even without the change.

Change-Id: Ib73f2b7aa9d81f5f0e1e817d16b9bec464c5a5aa
diff --git a/src/class_linker.cc b/src/class_linker.cc
index 39fefcb..b5e4020 100644
--- a/src/class_linker.cc
+++ b/src/class_linker.cc
@@ -2707,10 +2707,9 @@
   if (clinit != NULL) {
     if (Runtime::Current()->IsStarted()) {
       JValue result;
-      JValue float_result;
-      clinit->Invoke(self, NULL, 0, &result, &float_result);
+      clinit->Invoke(self, NULL, 0, &result, 'V');
     } else {
-      art::interpreter::EnterInterpreterFromInvoke(self, clinit, NULL, NULL, NULL, NULL);
+      art::interpreter::EnterInterpreterFromInvoke(self, clinit, NULL, NULL, NULL);
     }
   }
 
diff --git a/src/debugger.cc b/src/debugger.cc
index a2ebddf..080288f 100644
--- a/src/debugger.cc
+++ b/src/debugger.cc
@@ -2729,12 +2729,7 @@
   MethodHelper mh(m);
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, pReq->receiver_, reinterpret_cast<jvalue*>(pReq->arg_values_));
-  JValue unused_result;
-  if (mh.IsReturnFloatOrDouble()) {
-    InvokeWithArgArray(soa, m, &arg_array, &unused_result, &pReq->result_value);
-  } else {
-    InvokeWithArgArray(soa, m, &arg_array, &pReq->result_value, &unused_result);
-  }
+  InvokeWithArgArray(soa, m, &arg_array, &pReq->result_value, mh.GetShorty()[0]);
 
   pReq->exception = gRegistry->Add(soa.Self()->GetException());
   pReq->result_tag = BasicTagFromDescriptor(MethodHelper(m).GetShorty());
diff --git a/src/heap.cc b/src/heap.cc
index 718226d..2f7cb24 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -1729,11 +1729,10 @@
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
-  JValue float_result;
   ArgArray arg_array(NULL, 0);
   arg_array.Append(reinterpret_cast<uint32_t>(object));
   soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
-      arg_array.GetArray(), arg_array.GetNumBytes(), &result, &float_result);
+      arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
 }
 
 size_t Heap::GetBytesAllocated() const {
@@ -1767,11 +1766,10 @@
     if (LIKELY(Runtime::Current()->IsStarted())) {
       ScopedObjectAccess soa(Thread::Current());
       JValue result;
-      JValue float_result;
       ArgArray arg_array(NULL, 0);
       arg_array.Append(reinterpret_cast<uint32_t>(*cleared));
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
-          arg_array.GetArray(), arg_array.GetNumBytes(), &result, &float_result);
+          arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
     }
     *cleared = NULL;
   }
diff --git a/src/interpreter/interpreter.cc b/src/interpreter/interpreter.cc
index 29781bc..91b381c 100644
--- a/src/interpreter/interpreter.cc
+++ b/src/interpreter/interpreter.cc
@@ -74,7 +74,7 @@
     CHECK(c != NULL);
     Object* obj = klass->AllocObject(self);
     CHECK(obj != NULL);
-    EnterInterpreterFromInvoke(self, c, obj, NULL, NULL, NULL);
+    EnterInterpreterFromInvoke(self, c, obj, NULL, NULL);
     result->SetL(obj);
   } else if (name == "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") {
     // Special managed code cut-out to allow field lookup in a un-started runtime that'd fail
@@ -136,7 +136,7 @@
     }
   } else {
     // Not special, continue with regular interpreter execution.
-    EnterInterpreterFromInvoke(self, target_method, receiver, args, result, result);
+    EnterInterpreterFromInvoke(self, target_method, receiver, args, result);
   }
 }
 
@@ -414,14 +414,8 @@
     arg_array.BuildArgArray(shadow_frame, receiver, dec_insn.arg + (type != kStatic ? 1 : 0));
   }
   if (LIKELY(Runtime::Current()->IsStarted())) {
-    JValue unused_result;
-    if (mh.IsReturnFloatOrDouble()) {
-      target_method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(),
-                            &unused_result, result);
-    } else {
-      target_method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(),
-                            result, &unused_result);
-    }
+    target_method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result,
+                          mh.GetShorty()[0]);
   } else {
     uint32_t* args = arg_array.GetArray();
     if (type != kStatic) {
@@ -1807,7 +1801,7 @@
 }
 
 void EnterInterpreterFromInvoke(Thread* self, AbstractMethod* method, Object* receiver,
-                                uint32_t* args, JValue* result, JValue* float_result) {
+                                uint32_t* args, JValue* result) {
   DCHECK_EQ(self, Thread::Current());
   if (__builtin_frame_address(0) < self->GetStackEnd()) {
     ThrowStackOverflowError(self);
@@ -1875,28 +1869,16 @@
   }
   if (LIKELY(!method->IsNative())) {
     JValue r = Execute(self, mh, code_item, *shadow_frame.get(), JValue());
-    if (result != NULL && float_result != NULL) {
-      if (mh.IsReturnFloatOrDouble()) {
-        *float_result = r;
-      } else {
-        *result = r;
-      }
+    if (result != NULL) {
+      *result = r;
     }
   } else {
     // We don't expect to be asked to interpret native code (which is entered via a JNI compiler
     // generated stub) except during testing and image writing.
     if (!Runtime::Current()->IsStarted()) {
-      if (mh.IsReturnFloatOrDouble()) {
-        UnstartedRuntimeJni(self, method, receiver, args, float_result);
-      } else {
-        UnstartedRuntimeJni(self, method, receiver, args, result);
-      }
+      UnstartedRuntimeJni(self, method, receiver, args, result);
     } else {
-      if (mh.IsReturnFloatOrDouble()) {
-        InterpreterJni(self, method, shorty, receiver, args, float_result);
-      } else {
-        InterpreterJni(self, method, shorty, receiver, args, result);
-      }
+      InterpreterJni(self, method, shorty, receiver, args, result);
     }
   }
   self->PopShadowFrame();
diff --git a/src/interpreter/interpreter.h b/src/interpreter/interpreter.h
index 91816c9..556b044 100644
--- a/src/interpreter/interpreter.h
+++ b/src/interpreter/interpreter.h
@@ -34,8 +34,7 @@
 namespace interpreter {
 
 extern void EnterInterpreterFromInvoke(Thread* self, mirror::AbstractMethod* method,
-                                       mirror::Object* receiver, uint32_t* args,
-                                       JValue* result, JValue* float_result)
+                                       mirror::Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 extern JValue EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame& shadow_frame,
diff --git a/src/invoke_arg_array_builder.h b/src/invoke_arg_array_builder.h
index b1af6d5..e251e33 100644
--- a/src/invoke_arg_array_builder.h
+++ b/src/invoke_arg_array_builder.h
@@ -67,6 +67,12 @@
   }
 
   void AppendWide(uint64_t value) {
+    // For ARM and MIPS portable, align wide values to 8 bytes (ArgArray starts at offset of 4).
+#if defined(ART_USE_PORTABLE_COMPILER) && (TARGET_ARCH == arm || TARGET_ARCH == mips)
+    if (num_bytes_ % 8 == 0) {
+      num_bytes_ += 4;
+    }
+#endif
     arg_array_[num_bytes_ / 4] = value;
     arg_array_[(num_bytes_ / 4) + 1] = value >> 32;
     num_bytes_ += 8;
diff --git a/src/jni_internal.cc b/src/jni_internal.cc
index 8729ed7..d571bf3 100644
--- a/src/jni_internal.cc
+++ b/src/jni_internal.cc
@@ -135,12 +135,12 @@
 }
 
 void InvokeWithArgArray(const ScopedObjectAccess& soa, AbstractMethod* method,
-                        ArgArray* arg_array, JValue* result, JValue* float_result)
+                        ArgArray* arg_array, JValue* result, char result_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(soa.Env()->check_jni)) {
     CheckMethodArguments(method, arg_array->GetArray());
   }
-  method->Invoke(soa.Self(), arg_array->GetArray(), arg_array->GetNumBytes(), result, float_result);
+  method->Invoke(soa.Self(), arg_array->GetArray(), arg_array->GetNumBytes(), result, result_type);
 }
 
 static JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj,
@@ -150,15 +150,10 @@
   Object* receiver = method->IsStatic() ? NULL : soa.Decode<Object*>(obj);
   MethodHelper mh(method);
   JValue result;
-  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
-  if (mh.IsReturnFloatOrDouble()) {
-    return float_result;
-  } else {
-    return result;
-  }
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  return result;
 }
 
 static AbstractMethod* FindVirtualMethod(Object* receiver, AbstractMethod* method)
@@ -173,15 +168,10 @@
   AbstractMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   MethodHelper mh(method);
   JValue result;
-  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
-  if (mh.IsReturnFloatOrDouble()) {
-    return float_result;
-  } else {
-    return result;
-  }
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  return result;
 }
 
 static JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
@@ -191,15 +181,10 @@
   AbstractMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   MethodHelper mh(method);
   JValue result;
-  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
-  if (mh.IsReturnFloatOrDouble()) {
-    return float_result;
-  } else {
-    return result;
-  }
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  return result;
 }
 
 // Section 12.3.2 of the JNI spec describes JNI class descriptors. They're
@@ -597,15 +582,10 @@
   Object* receiver = method->IsStatic() ? NULL : soa.Decode<Object*>(obj);
   MethodHelper mh(method);
   JValue result;
-  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
-  if (mh.IsReturnFloatOrDouble()) {
-    return float_result;
-  } else {
-    return result;
-  }
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  return result;
 }
 
 class JNI {
diff --git a/src/jni_internal.h b/src/jni_internal.h
index d4fc514..9c067de 100644
--- a/src/jni_internal.h
+++ b/src/jni_internal.h
@@ -57,7 +57,7 @@
 JValue InvokeWithJValues(const ScopedObjectAccess&, jobject obj, jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 void InvokeWithArgArray(const ScopedObjectAccess& soa, mirror::AbstractMethod* method,
-                        ArgArray *arg_array, JValue* result, JValue* float_result)
+                        ArgArray *arg_array, JValue* result, char result_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
diff --git a/src/jni_internal_test.cc b/src/jni_internal_test.cc
index 5ef2281..8f0d09b 100644
--- a/src/jni_internal_test.cc
+++ b/src/jni_internal_test.cc
@@ -31,7 +31,7 @@
 namespace art {
 
 extern "C" void art_quick_invoke_stub(const mirror::AbstractMethod*, uint32_t*, uint32_t,
-                                      Thread*, JValue*, JValue*);
+                                      Thread*, JValue*, char);
 
 class JniInternalTest : public CommonTest {
  protected:
@@ -114,12 +114,11 @@
 
     ArgArray arg_array(NULL, 0);
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
     }
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'V');
   }
 
   void InvokeIdentityByteMethod(bool is_static)
@@ -131,7 +130,6 @@
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -140,22 +138,22 @@
 
     arg_array.Append(0);
     result.SetB(-1);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'B');
     EXPECT_EQ(0, result.GetB());
 
     args[0] = -1;
     result.SetB(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'B');
     EXPECT_EQ(-1, result.GetB());
 
     args[0] = SCHAR_MAX;
     result.SetB(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'B');
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
     args[0] = (SCHAR_MIN << 24) >> 24;
     result.SetB(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'B');
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
@@ -168,7 +166,6 @@
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -177,22 +174,22 @@
 
     arg_array.Append(0);
     result.SetI(-1);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
 
     args[0] = -1;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(-1, result.GetI());
 
     args[0] = INT_MAX;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(INT_MAX, result.GetI());
 
     args[0] = INT_MIN;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
@@ -206,7 +203,6 @@
     uint32_t* args = arg_array.GetArray();
     JValue value;
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -216,29 +212,29 @@
     value.SetD(0.0);
     arg_array.AppendWide(value.GetJ());
     result.SetD(-1.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(0.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(-1.0);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(-1.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(-1.0, result.GetD());
 
     value.SetD(DBL_MAX);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(DBL_MAX, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(DBL_MAX, result.GetD());
 
     value.SetD(DBL_MIN);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(DBL_MIN, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(DBL_MIN, result.GetD());
   }
 
   void InvokeSumIntIntMethod(bool is_static)
@@ -250,7 +246,6 @@
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -260,31 +255,31 @@
     arg_array.Append(0);
     arg_array.Append(0);
     result.SetI(-1);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
     args[1] = 2;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(3, result.GetI());
 
     args[0] = -2;
     args[1] = 5;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(3, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MIN;
     result.SetI(1234);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(-1, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MAX;
     result.SetI(INT_MIN);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(-2, result.GetI());
   }
 
@@ -297,7 +292,6 @@
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -308,35 +302,35 @@
     arg_array.Append(0);
     arg_array.Append(0);
     result.SetI(-1);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
     args[1] = 2;
     args[2] = 3;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(6, result.GetI());
 
     args[0] = -1;
     args[1] = 2;
     args[2] = -3;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(-2, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MIN;
     args[2] = INT_MAX;
     result.SetI(1234);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(2147483646, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MAX;
     args[2] = INT_MAX;
     result.SetI(INT_MIN);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(2147483645, result.GetI());
   }
 
@@ -349,7 +343,6 @@
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -361,7 +354,7 @@
     arg_array.Append(0);
     arg_array.Append(0);
     result.SetI(-1);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
@@ -369,7 +362,7 @@
     args[2] = 3;
     args[3] = 4;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(10, result.GetI());
 
     args[0] = -1;
@@ -377,7 +370,7 @@
     args[2] = -3;
     args[3] = 4;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(2, result.GetI());
 
     args[0] = INT_MAX;
@@ -385,7 +378,7 @@
     args[2] = INT_MAX;
     args[3] = INT_MIN;
     result.SetI(1234);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(-2, result.GetI());
 
     args[0] = INT_MAX;
@@ -393,7 +386,7 @@
     args[2] = INT_MAX;
     args[3] = INT_MAX;
     result.SetI(INT_MIN);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(-4, result.GetI());
   }
 
@@ -406,7 +399,6 @@
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -419,7 +411,7 @@
     arg_array.Append(0);
     arg_array.Append(0);
     result.SetI(-1.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
@@ -428,7 +420,7 @@
     args[3] = 4;
     args[4] = 5;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(15, result.GetI());
 
     args[0] = -1;
@@ -437,7 +429,7 @@
     args[3] = 4;
     args[4] = -5;
     result.SetI(0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(-3, result.GetI());
 
     args[0] = INT_MAX;
@@ -446,7 +438,7 @@
     args[3] = INT_MIN;
     args[4] = INT_MAX;
     result.SetI(1234);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(2147483645, result.GetI());
 
     args[0] = INT_MAX;
@@ -455,7 +447,7 @@
     args[3] = INT_MAX;
     args[4] = INT_MAX;
     result.SetI(INT_MIN);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'I');
     EXPECT_EQ(2147483643, result.GetI());
   }
 
@@ -470,7 +462,6 @@
     JValue value;
     JValue value2;
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -482,8 +473,8 @@
     arg_array.AppendWide(value.GetJ());
     arg_array.AppendWide(value2.GetJ());
     result.SetD(-1.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(0.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(2.0);
@@ -492,8 +483,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(3.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(3.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(-2.0);
@@ -502,8 +493,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(-1.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(-1.0, result.GetD());
 
     value.SetD(DBL_MAX);
     value2.SetD(DBL_MIN);
@@ -512,8 +503,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(1.7976931348623157e308, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(1.7976931348623157e308, result.GetD());
 
     value.SetD(DBL_MAX);
     value2.SetD(DBL_MAX);
@@ -522,8 +513,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(INFINITY, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(INFINITY, result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleMethod(bool is_static)
@@ -538,7 +529,6 @@
     JValue value2;
     JValue value3;
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -552,8 +542,8 @@
     arg_array.AppendWide(value2.GetJ());
     arg_array.AppendWide(value3.GetJ());
     result.SetD(-1.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(0.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(2.0);
@@ -565,8 +555,8 @@
     args[4] = value3.GetJ();
     args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(6.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(6.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(-2.0);
@@ -578,8 +568,8 @@
     args[4] = value3.GetJ();
     args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(2.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(2.0, result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleDoubleMethod(bool is_static)
@@ -595,7 +585,6 @@
     JValue value3;
     JValue value4;
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -611,8 +600,8 @@
     arg_array.AppendWide(value3.GetJ());
     arg_array.AppendWide(value4.GetJ());
     result.SetD(-1.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(0.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(2.0);
@@ -627,8 +616,8 @@
     args[6] = value4.GetJ();
     args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(10.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(10.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(-2.0);
@@ -643,8 +632,8 @@
     args[6] = value4.GetJ();
     args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(-2.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(-2.0, result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(bool is_static)
@@ -661,7 +650,6 @@
     JValue value4;
     JValue value5;
     JValue result;
-    JValue float_result;
 
     if (!is_static) {
       arg_array.Append(reinterpret_cast<uint32_t>(receiver));
@@ -679,8 +667,8 @@
     arg_array.AppendWide(value4.GetJ());
     arg_array.AppendWide(value5.GetJ());
     result.SetD(-1.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(0.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(2.0);
@@ -698,8 +686,8 @@
     args[8] = value5.GetJ();
     args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(15.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(15.0, result.GetD());
 
     value.SetD(1.0);
     value2.SetD(-2.0);
@@ -717,8 +705,8 @@
     args[8] = value5.GetJ();
     args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
-    EXPECT_EQ(3.0, float_result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'D');
+    EXPECT_EQ(3.0, result.GetD());
   }
 
   JavaVMExt* vm_;
@@ -1602,9 +1590,8 @@
   ArgArray arg_array(NULL, 0);
   arg_array.Append(0);
   JValue result;
-  JValue float_result;
 
-  (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+  (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, 'V');
 }
 
 TEST_F(JniInternalTest, StaticNopMethod) {
diff --git a/src/mirror/abstract_method.cc b/src/mirror/abstract_method.cc
index c0c9a55..2e56004 100644
--- a/src/mirror/abstract_method.cc
+++ b/src/mirror/abstract_method.cc
@@ -32,8 +32,8 @@
 namespace art {
 namespace mirror {
 
-extern "C" void art_quick_invoke_stub(AbstractMethod*, uint32_t*, uint32_t,
-                                      Thread*, JValue*, JValue*);
+extern "C" void art_portable_invoke_stub(AbstractMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
+extern "C" void art_quick_invoke_stub(AbstractMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
 
 // TODO: get global references for these
 Class* AbstractMethod::java_lang_reflect_Constructor_ = NULL;
@@ -260,7 +260,7 @@
 }
 
 void AbstractMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
-                            JValue* float_result) {
+                            char result_type) {
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
@@ -278,7 +278,6 @@
     LOG(INFO) << "Not invoking " << PrettyMethod(this) << " for a runtime that isn't started";
     if (result != NULL) {
       result->SetJ(0);
-      float_result->SetJ(0);
     }
   } else {
     bool interpret = self->ReadFlag(kEnterInterpreter) && !IsNative() && !IsProxyMethod();
@@ -289,34 +288,10 @@
           LOG(INFO) << StringPrintf("Invoking '%s' code=%p stub=%p",
                                     PrettyMethod(this).c_str(), GetCode(), stub);
         }
-        // TODO: Temporary to keep portable working while stubs are removed from quick.
 #ifdef ART_USE_PORTABLE_COMPILER
-        MethodHelper mh(this);
-        const char* shorty = mh.GetShorty();
-        uint32_t shorty_len = mh.GetShortyLength();
-        UniquePtr<JValue[]> jvalue_args(new JValue[shorty_len - 1]);
-        Object* receiver = NULL;
-        uint32_t* ptr = args;
-        if (!this->IsStatic()) {
-          receiver = reinterpret_cast<Object*>(*ptr);
-          ptr++;
-        }
-        for (uint32_t i = 1; i < shorty_len; i++) {
-          if ((shorty[i] == 'J') || (shorty[i] == 'D')) {
-            jvalue_args[i - 1].SetJ(*((uint64_t*)ptr));
-            ptr++;
-          } else {
-            jvalue_args[i - 1].SetI(*ptr);
-          }
-          ptr++;
-        }
-        if (mh.IsReturnFloatOrDouble()) {
-          (*stub)(this, receiver, self, jvalue_args.get(), float_result);
-        } else {
-          (*stub)(this, receiver, self, jvalue_args.get(), result);
-        }
+        (*art_portable_invoke_stub)(this, args, args_size, self, result, result_type);
 #else
-        (*art_quick_invoke_stub)(this, args, args_size, self, result, float_result);
+        (*art_quick_invoke_stub)(this, args, args_size, self, result, result_type);
 #endif
         if (UNLIKELY(reinterpret_cast<int32_t>(self->GetException()) == -1)) {
           // Unusual case where we were running LLVM generated code and an
@@ -337,12 +312,10 @@
           LOG(INFO) << "Interpreting " << PrettyMethod(this) << "'";
         }
         if (this->IsStatic()) {
-          art::interpreter::EnterInterpreterFromInvoke(self, this, NULL, args,
-                                                       result, float_result);
+          art::interpreter::EnterInterpreterFromInvoke(self, this, NULL, args, result);
         } else {
           Object* receiver = reinterpret_cast<Object*>(args[0]);
-          art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args + 1,
-                                                       result, float_result);
+          art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args + 1, result);
         }
         if (kLogInvocationStartAndReturn) {
           LOG(INFO) << "Returned '" << PrettyMethod(this) << "'";
@@ -354,7 +327,6 @@
           << " stub=" << reinterpret_cast<void*>(stub);
       if (result != NULL) {
         result->SetJ(0);
-        float_result->SetJ(0);
       }
     }
   }
diff --git a/src/mirror/abstract_method.h b/src/mirror/abstract_method.h
index a489b1d..b91885a 100644
--- a/src/mirror/abstract_method.h
+++ b/src/mirror/abstract_method.h
@@ -193,8 +193,7 @@
   // Find the method that this method overrides
   AbstractMethod* FindOverriddenMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
-              JValue* float_result)
+  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, char result_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const void* GetCode() const {
diff --git a/src/oat/runtime/arm/runtime_support_arm.S b/src/oat/runtime/arm/runtime_support_arm.S
index bd3f45d..a2ffac7 100644
--- a/src/oat/runtime/arm/runtime_support_arm.S
+++ b/src/oat/runtime/arm/runtime_support_arm.S
@@ -247,14 +247,61 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
-     * Invocation stub.
+     * Portable invocation stub.
      * On entry:
      *   r0 = method pointer
      *   r1 = argument array or NULL for no argument methods
      *   r2 = size of argument array in bytes
      *   r3 = (managed) thread pointer
-     *   [sp] = JValue* result for non-floating point returns
-     *   [sp + 4] = JValue* result for floating point returns
+     *   [sp] = JValue* result
+     *   [sp + 4] = result type char
+     */
+ENTRY art_portable_invoke_stub
+    push   {r0, r4, r5, r9, r11, lr}       @ spill regs
+    .save  {r0, r4, r5, r9, r11, lr}
+    .pad #24
+    .cfi_adjust_cfa_offset 24
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r4, 4
+    .cfi_rel_offset r5, 8
+    .cfi_rel_offset r9, 12
+    .cfi_rel_offset r11, 16
+    .cfi_rel_offset lr, 20
+    mov    r11, sp                         @ save the stack pointer
+    .cfi_def_cfa_register r11
+    mov    r9, r3                          @ move managed thread pointer into r9
+    mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
+    add    r5, r2, #16                     @ create space for method pointer in frame
+    and    r5, #0xFFFFFFF0                 @ align frame size to 16 bytes
+    sub    sp, r5                          @ reserve stack space for argument array
+    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
+    bl     memcpy                          @ memcpy (dest, src, bytes)
+    ldr    r0, [r11]                       @ restore method*
+    ldr    r1, [sp, #4]                    @ copy arg value for r1
+    ldr    r2, [sp, #8]                    @ copy arg value for r2
+    ldr    r3, [sp, #12]                   @ copy arg value for r3
+    mov    ip, #0                          @ set ip to 0
+    str    ip, [sp]                        @ store NULL for method* at bottom of frame
+    add    sp, #16                         @ first 4 args are not passed on stack for portable
+    ldr    ip, [r0, #METHOD_CODE_OFFSET]   @ get pointer to the code
+    blx    ip                              @ call the method
+    mov    sp, r11                         @ restore the stack pointer
+    ldr    ip, [sp, #24]                   @ load the result pointer
+    strd   r0, [ip]                        @ store r0/r1 into result pointer
+    pop    {r0, r4, r5, r9, r11, lr}       @ restore spill regs
+    .cfi_adjust_cfa_offset -24
+    bx     lr
+END art_portable_invoke_stub
+
+    /*
+     * Quick invocation stub.
+     * On entry:
+     *   r0 = method pointer
+     *   r1 = argument array or NULL for no argument methods
+     *   r2 = size of argument array in bytes
+     *   r3 = (managed) thread pointer
+     *   [sp] = JValue* result
+     *   [sp + 4] = result type char
      */
 ENTRY art_quick_invoke_stub
     push   {r0, r4, r5, r9, r11, lr}       @ spill regs
@@ -272,7 +319,7 @@
     mov    r9, r3                          @ move managed thread pointer into r9
     mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
     add    r5, r2, #16                     @ create space for method pointer in frame
-    and    r5, #0xFFFFFFF8                 @ align frame size to 16 bytes
+    and    r5, #0xFFFFFFF0                 @ align frame size to 16 bytes
     sub    sp, r5                          @ reserve stack space for argument array
     add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
     bl     memcpy                          @ memcpy (dest, src, bytes)
@@ -284,15 +331,14 @@
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
     ldr    ip, [r0, #METHOD_CODE_OFFSET]   @ get pointer to the code
     blx    ip                              @ call the method
-    add    sp, r5                          @ restore the stack
+    mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
     strd   r0, [ip]                        @ store r0/r1 into result pointer
-    ldr    ip, [sp, #28]                   @ load the floating point result pointer
-    strd   r0, [ip]                        @ store r0/r1 into floating point result pointer
     pop    {r0, r4, r5, r9, r11, lr}       @ restore spill regs
     .cfi_adjust_cfa_offset -24
     bx     lr
 END art_quick_invoke_stub
+
     /*
      * On entry, r0 and r1 must be preserved, r2 is dex PC
      */
diff --git a/src/oat/runtime/mips/runtime_support_mips.S b/src/oat/runtime/mips/runtime_support_mips.S
index 480e5c8..c8f0809 100644
--- a/src/oat/runtime/mips/runtime_support_mips.S
+++ b/src/oat/runtime/mips/runtime_support_mips.S
@@ -427,15 +427,18 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
-     * Invocation stub.
+     * Common invocation stub for portable and quick.
      * On entry:
      *   a0 = method pointer
      *   a1 = argument array or NULL for no argument methods
      *   a2 = size of argument array in bytes
      *   a3 = (managed) thread pointer
-     *   [sp + 16] = JValue* result for non-floating point returns
-     *   [sp + 20] = JValue* result for floating point returns
+     *   [sp + 16] = JValue* result
+     *   [sp + 20] = result type char
      */
+     .type art_portable_invoke_stub, %function
+     .global art_portable_invoke_stub
+art_portable_invoke_stub:
 ENTRY art_quick_invoke_stub
     GENERATE_GLOBAL_POINTER
     sw    $a0, 0($sp)           # save out a0
@@ -476,13 +479,20 @@
     addiu $sp, $sp, 16
     .cfi_adjust_cfa_offset -16
     lw    $t0, 16($sp)          # get result pointer
+    lw    $t1, 20($sp)          # get result type char
+    li    $t2, 68               # put char 'D' into t2
+    beq   $t1, $t2, 1f          # branch if result type char == 'D'
+    li    $t3, 70               # put char 'F' into t3
+    beq   $t1, $t3, 1f          # branch if result type char == 'F'
     sw    $v0, 0($t0)           # store the result
+    jr    $ra
     sw    $v1, 4($t0)           # store the other half of the result
-    lw    $t0, 20($sp)          # get floating point result pointer
+1:
     s.s   $f0, 0($t0)           # store floating point result
     jr    $ra
     s.s   $f1, 4($t0)           # store other half of floating point result
 END art_quick_invoke_stub
+    .size art_portable_invoke_stub, .-art_portable_invoke_stub
 
     /*
      * Entry point of native methods when JNI bug compatibility is enabled.
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index 76af338..f7554a2 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -302,15 +302,64 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
-     * Invocation stub.
+     * Portable invocation stub.
      * On entry:
      *   [sp] = return address
      *   [sp + 4] = method pointer
      *   [sp + 8] = argument array or NULL for no argument methods
      *   [sp + 12] = size of argument array in bytes
      *   [sp + 16] = (managed) thread pointer
-     *   [sp + 20] = JValue* result for non-floating point returns
-     *   [sp + 24] = JValue* result for floating point returns
+     *   [sp + 20] = JValue* result
+     *   [sp + 24] = result type char
+     */
+DEFINE_FUNCTION art_portable_invoke_stub
+    PUSH ebp                      // save ebp
+    PUSH ebx                      // save ebx
+    mov %esp, %ebp                // copy value of stack pointer into base pointer
+    .cfi_def_cfa_register ebp
+    mov 20(%ebp), %ebx            // get arg array size
+    addl LITERAL(28), %ebx        // reserve space for return addr, method*, ebx, and ebp in frame
+    andl LITERAL(0xFFFFFFF0), %ebx    // align frame size to 16 bytes
+    subl LITERAL(12), %ebx        // remove space for return address, ebx, and ebp
+    subl %ebx, %esp               // reserve stack space for argument array
+    lea  4(%esp), %eax            // use stack pointer + method ptr as dest for memcpy
+    pushl 20(%ebp)                // push size of region to memcpy
+    pushl 16(%ebp)                // push arg array as source of memcpy
+    pushl %eax                    // push stack pointer as destination of memcpy
+    call SYMBOL(memcpy)           // (void*, const void*, size_t)
+    addl LITERAL(12), %esp        // pop arguments to memcpy
+    mov 12(%ebp), %eax            // move method pointer into eax
+    mov %eax, (%esp)              // push method pointer onto stack
+    call *METHOD_CODE_OFFSET(%eax) // call the method
+    mov %ebp, %esp                // restore stack pointer
+    POP ebx                       // pop ebx
+    POP ebp                       // pop ebp
+    mov 20(%esp), %ecx            // get result pointer
+    cmpl LITERAL(68), 24(%esp)    // test if result type char == 'D'
+    je return_double_portable
+    cmpl LITERAL(70), 24(%esp)    // test if result type char == 'F'
+    je return_float_portable
+    mov %eax, (%ecx)              // store the result
+    mov %edx, 4(%ecx)             // store the other half of the result
+    ret
+return_double_portable:
+    fstpl (%ecx)                  // store the floating point result as double
+    ret
+return_float_portable:
+    fstps (%ecx)                  // store the floating point result as float
+    ret
+END_FUNCTION art_portable_invoke_stub
+
+    /*
+     * Quick invocation stub.
+     * On entry:
+     *   [sp] = return address
+     *   [sp + 4] = method pointer
+     *   [sp + 8] = argument array or NULL for no argument methods
+     *   [sp + 12] = size of argument array in bytes
+     *   [sp + 16] = (managed) thread pointer
+     *   [sp + 20] = JValue* result
+     *   [sp + 24] = result type char
      */
 DEFINE_FUNCTION art_quick_invoke_stub
     PUSH ebp                      // save ebp
@@ -319,7 +368,7 @@
     .cfi_def_cfa_register ebp
     mov 20(%ebp), %ebx            // get arg array size
     addl LITERAL(28), %ebx        // reserve space for return addr, method*, ebx, and ebp in frame
-    andl LITERAL(0xFFFFFFF8), %ebx    // align frame size to 16 bytes
+    andl LITERAL(0xFFFFFFF0), %ebx    // align frame size to 16 bytes
     subl LITERAL(12), %ebx        // remove space for return address, ebx, and ebp
     subl %ebx, %esp               // reserve stack space for argument array
     lea  4(%esp), %eax            // use stack pointer + method ptr as dest for memcpy
@@ -338,9 +387,15 @@
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
     mov 20(%esp), %ecx            // get result pointer
+    cmpl LITERAL(68), 24(%esp)    // test if result type char == 'D'
+    je return_double_quick
+    cmpl LITERAL(70), 24(%esp)    // test if result type char == 'F'
+    je return_float_quick
     mov %eax, (%ecx)              // store the result
     mov %edx, 4(%ecx)             // store the other half of the result
-    mov 24(%esp), %ecx            // get floating point result pointer
+    ret
+return_double_quick:
+return_float_quick:
     movsd %xmm0, (%ecx)           // store the floating point result
     ret
 END_FUNCTION art_quick_invoke_stub
diff --git a/src/object_utils.h b/src/object_utils.h
index 616c65c..8a4bf77 100644
--- a/src/object_utils.h
+++ b/src/object_utils.h
@@ -596,11 +596,6 @@
     return GetParamPrimitiveType(param) == Primitive::kPrimNot;
   }
 
-  bool IsReturnFloatOrDouble() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char ret_shorty = GetShorty()[0];
-    return (ret_shorty == 'F') || (ret_shorty == 'D');
-  }
-
   bool HasSameNameAndSignature(MethodHelper* other)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (GetDexCache() == other->GetDexCache()) {
diff --git a/src/reflection.cc b/src/reflection.cc
index 6b64311..73a8a53 100644
--- a/src/reflection.cc
+++ b/src/reflection.cc
@@ -248,7 +248,6 @@
 
   ArgArray arg_array(NULL, 0);
   JValue result;
-  JValue float_result;
   if (src_class == Primitive::kPrimDouble || src_class == Primitive::kPrimLong) {
     arg_array.AppendWide(value.GetJ());
   } else {
@@ -256,7 +255,7 @@
   }
 
   soa.DecodeMethod(m)->Invoke(soa.Self(), arg_array.GetArray(), arg_array.GetNumBytes(),
-                              &result, &float_result);
+                              &result, 'L');
   return result.GetL();
 }
 
diff --git a/src/runtime.cc b/src/runtime.cc
index 1e7b000..f6fdef4 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -630,9 +630,8 @@
   CHECK(getSystemClassLoader != NULL);
 
   JValue result;
-  JValue float_result;
   ArgArray arg_array(NULL, 0);
-  InvokeWithArgArray(soa, getSystemClassLoader, &arg_array, &result, &float_result);
+  InvokeWithArgArray(soa, getSystemClassLoader, &arg_array, &result, 'L');
   mirror::ClassLoader* class_loader = down_cast<mirror::ClassLoader*>(result.GetL());
   CHECK(class_loader != NULL);
 
diff --git a/src/thread.cc b/src/thread.cc
index a85d22a..28a01bc2 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -164,10 +164,9 @@
     mirror::AbstractMethod* m =
         receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
     JValue result;
-    JValue float_result;
     ArgArray arg_array(NULL, 0);
     arg_array.Append(reinterpret_cast<uint32_t>(receiver));
-    m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, &float_result);
+    m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
   }
   // Detach and delete self.
   Runtime::Current()->GetThreadList()->Unregister(self);