Revert "ART: Implement X86 hard float (Quick/JNI/Baseline)"

And the 3 Mac build fixes. Fix conflicts in context_x86.* .

This reverts commits
  3d2c8e74c27efee58e24ec31441124f3f21384b9 ,
  34eda1dd66b92a361797c63d57fa19e83c08a1b4 ,
  f601d1954348b71186fa160a0ae6a1f4f1c5aee6 ,
  bc503348a1da573488503cc2819c9e30807bea31 .

Bug: 19150481
Change-Id: I6650ee30a7d261159380fe2119e14379e4dc9970
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 811d4f5..3815722 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -41,15 +41,22 @@
     }
    protected:
     Mir2Lir* m2l_;
+   private:
     size_t cur_core_reg_;
     size_t cur_fp_reg_;
   };
 
-  class InToRegStorageX86Mapper : public InToRegStorageX86_64Mapper {
+  class InToRegStorageX86Mapper : public InToRegStorageMapper {
    public:
-    explicit InToRegStorageX86Mapper(Mir2Lir* m2l)
-        : InToRegStorageX86_64Mapper(m2l) { }
+    explicit InToRegStorageX86Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
     virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+    }
+   protected:
+    Mir2Lir* m2l_;
+   private:
+    size_t cur_core_reg_;
   };
 
   InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_;
@@ -113,12 +120,9 @@
       if (cu_->target64) {
         return As64BitReg(TargetReg32(symbolic_reg));
       } else {
-        if (symbolic_reg >= kFArg0 && symbolic_reg <= kFArg3) {
-          // We want an XMM, not a pair.
-          return As64BitReg(TargetReg32(symbolic_reg));
-        }
         // x86: construct a pair.
         DCHECK((kArg0 <= symbolic_reg && symbolic_reg < kArg3) ||
+               (kFArg0 <= symbolic_reg && symbolic_reg < kFArg3) ||
                (kRet0 == symbolic_reg));
         return RegStorage::MakeRegPair(TargetReg32(symbolic_reg),
                                  TargetReg32(static_cast<SpecialTargetRegister>(symbolic_reg + 1)));
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 0337096..bc64aad 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -180,10 +180,10 @@
   RegStorage::InvalidReg(),  // kArg5
   RegStorage::InvalidReg(),  // kArg6
   RegStorage::InvalidReg(),  // kArg7
-  rs_fr0,                    // kFArg0
-  rs_fr1,                    // kFArg1
-  rs_fr2,                    // kFArg2
-  rs_fr3,                    // kFArg3
+  rs_rAX,                    // kFArg0
+  rs_rCX,                    // kFArg1
+  rs_rDX,                    // kFArg2
+  rs_rBX,                    // kFArg3
   RegStorage::InvalidReg(),  // kFArg4
   RegStorage::InvalidReg(),  // kFArg5
   RegStorage::InvalidReg(),  // kFArg6
@@ -200,7 +200,7 @@
   rs_rDX,                    // kRet1
   rs_rAX,                    // kInvokeTgt
   rs_rAX,                    // kHiddenArg - used to hold the method index before copying to fr0.
-  rs_fr7,                    // kHiddenFpArg
+  rs_fr0,                    // kHiddenFpArg
   rs_rCX,                    // kCount
 };
 
@@ -545,13 +545,13 @@
   LockTemp(TargetReg32(kArg1));
   LockTemp(TargetReg32(kArg2));
   LockTemp(TargetReg32(kArg3));
-  LockTemp(TargetReg32(kFArg0));
-  LockTemp(TargetReg32(kFArg1));
-  LockTemp(TargetReg32(kFArg2));
-  LockTemp(TargetReg32(kFArg3));
   if (cu_->target64) {
     LockTemp(TargetReg32(kArg4));
     LockTemp(TargetReg32(kArg5));
+    LockTemp(TargetReg32(kFArg0));
+    LockTemp(TargetReg32(kFArg1));
+    LockTemp(TargetReg32(kFArg2));
+    LockTemp(TargetReg32(kFArg3));
     LockTemp(TargetReg32(kFArg4));
     LockTemp(TargetReg32(kFArg5));
     LockTemp(TargetReg32(kFArg6));
@@ -566,13 +566,13 @@
   FreeTemp(TargetReg32(kArg2));
   FreeTemp(TargetReg32(kArg3));
   FreeTemp(TargetReg32(kHiddenArg));
-  FreeTemp(TargetReg32(kFArg0));
-  FreeTemp(TargetReg32(kFArg1));
-  FreeTemp(TargetReg32(kFArg2));
-  FreeTemp(TargetReg32(kFArg3));
   if (cu_->target64) {
     FreeTemp(TargetReg32(kArg4));
     FreeTemp(TargetReg32(kArg5));
+    FreeTemp(TargetReg32(kFArg0));
+    FreeTemp(TargetReg32(kFArg1));
+    FreeTemp(TargetReg32(kFArg2));
+    FreeTemp(TargetReg32(kFArg3));
     FreeTemp(TargetReg32(kFArg4));
     FreeTemp(TargetReg32(kFArg5));
     FreeTemp(TargetReg32(kFArg6));
@@ -2460,23 +2460,14 @@
 RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) {
   const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
   const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
-  const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3};
-  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
 
   RegStorage result = RegStorage::InvalidReg();
-  if (arg.IsFP()) {
-    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
-                             arg.IsWide() ? kWide : kNotWide);
-    }
-  } else {
-    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
-                               arg.IsRef() ? kRef : kNotWide);
-      if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-        result = RegStorage::MakeRegPair(
-            result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
-      }
+  if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                          arg.IsRef() ? kRef : kNotWide);
+    if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = RegStorage::MakeRegPair(
+          result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
     }
   }
   return result;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 7dea09a..bc4cb5a 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -57,15 +57,15 @@
  * x86-64/x32 gs: holds it.
  *
  * For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
- *  Native: x86  | x86-64 / x32 | ART x86                          | ART x86-64
- *  XMM0: caller | caller, arg1 | caller, arg1, float return value | caller, arg1, float return value
- *  XMM1: caller | caller, arg2 | caller, arg2, scratch            | caller, arg2, scratch
- *  XMM2: caller | caller, arg3 | caller, arg3, scratch            | caller, arg3, scratch
- *  XMM3: caller | caller, arg4 | caller, arg4, scratch            | caller, arg4, scratch
- *  XMM4: caller | caller, arg5 | caller, scratch                  | caller, arg5, scratch
- *  XMM5: caller | caller, arg6 | caller, scratch                  | caller, arg6, scratch
- *  XMM6: caller | caller, arg7 | caller, scratch                  | caller, arg7, scratch
- *  XMM7: caller | caller, arg8 | caller, scratch                  | caller, arg8, scratch
+ *  Native: x86  | x86-64 / x32 | ART x86                    | ART x86-64
+ *  XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value
+ *  XMM1: caller | caller, arg2 | caller, scratch            | caller, arg2, scratch
+ *  XMM2: caller | caller, arg3 | caller, scratch            | caller, arg3, scratch
+ *  XMM3: caller | caller, arg4 | caller, scratch            | caller, arg4, scratch
+ *  XMM4: caller | caller, arg5 | caller, scratch            | caller, arg5, scratch
+ *  XMM5: caller | caller, arg6 | caller, scratch            | caller, arg6, scratch
+ *  XMM6: caller | caller, arg7 | caller, scratch            | caller, arg7, scratch
+ *  XMM7: caller | caller, arg8 | caller, scratch            | caller, arg8, scratch
  *  ---  x86-64/x32 registers
  *  XMM8 .. 11: caller save available as scratch registers for ART.
  *  XMM12 .. 15: callee save available as promoted registers for ART.
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index fc72e88..a5686e1 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -77,34 +77,12 @@
 }
 
 bool X86ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
-  // We assume all parameters are on stack, args coming via registers are spilled as entry_spills.
-  return true;
+  return true;  // Everything is passed by stack
 }
 
 ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamRegister() {
-  ManagedRegister res = ManagedRegister::NoRegister();
-  if (!IsCurrentParamAFloatOrDouble()) {
-    switch (gpr_arg_count_) {
-      case 0: res = X86ManagedRegister::FromCpuRegister(ECX); break;
-      case 1: res = X86ManagedRegister::FromCpuRegister(EDX); break;
-      case 2: res = X86ManagedRegister::FromCpuRegister(EBX); break;
-    }
-  } else if (itr_float_and_doubles_ < 4) {
-    // First four float parameters are passed via XMM0..XMM3
-    res = X86ManagedRegister::FromXmmRegister(
-                                 static_cast<XmmRegister>(XMM0 + itr_float_and_doubles_));
-  }
-  return res;
-}
-
-ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamHighLongRegister() {
-  ManagedRegister res = ManagedRegister::NoRegister();
-  DCHECK(IsCurrentParamALong());
-  switch (gpr_arg_count_) {
-    case 0: res = X86ManagedRegister::FromCpuRegister(EDX); break;
-    case 1: res = X86ManagedRegister::FromCpuRegister(EBX); break;
-  }
-  return res;
+  LOG(FATAL) << "Should not reach here";
+  return ManagedRegister::NoRegister();
 }
 
 FrameOffset X86ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
@@ -117,32 +95,15 @@
   // We spill the argument registers on X86 to free them up for scratch use, we then assume
   // all arguments are on the stack.
   if (entry_spills_.size() == 0) {
-    ResetIterator(FrameOffset(0));
-    while (HasNext()) {
-      ManagedRegister in_reg = CurrentParamRegister();
-      if (!in_reg.IsNoRegister()) {
-        int32_t size = IsParamADouble(itr_args_) ? 8 : 4;
-        int32_t spill_offset = CurrentParamStackOffset().Uint32Value();
-        ManagedRegisterSpill spill(in_reg, size, spill_offset);
-        entry_spills_.push_back(spill);
-        if (IsCurrentParamALong() && !IsCurrentParamAReference()) {  // Long.
-          // special case, as we may need a second register here.
-          in_reg = CurrentParamHighLongRegister();
-          if (!in_reg.IsNoRegister()) {
-            // We have to spill the second half of the long.
-            ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4);
-            entry_spills_.push_back(spill2);
-            // Long was allocated in 2 registers.
-            gpr_arg_count_++;
-          }
-        }
-
-        // Keep track of the number of GPRs allocated.
-        if (!IsCurrentParamAFloatOrDouble()) {
-          gpr_arg_count_++;
+    size_t num_spills = NumArgs() + NumLongOrDoubleArgs();
+    if (num_spills > 0) {
+      entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(ECX));
+      if (num_spills > 1) {
+        entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EDX));
+        if (num_spills > 2) {
+          entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EBX));
         }
       }
-      Next();
     }
   }
   return entry_spills_;
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index b1b3598..025eb6d 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -28,8 +28,7 @@
  public:
   explicit X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized,
                                               const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize),
-        gpr_arg_count_(0) {}
+      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
   ~X86ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -41,10 +40,7 @@
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
   const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE;
-
  private:
-  int gpr_arg_count_;
-  ManagedRegister CurrentParamHighLongRegister();
   ManagedRegisterEntrySpills entry_spills_;
   DISALLOW_COPY_AND_ASSIGN(X86ManagedRuntimeCallingConvention);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 66f1d5e..c0fdcaa 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -36,9 +36,8 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
-static constexpr size_t kRuntimeParameterFpuRegistersLength =
-    arraysize(kRuntimeParameterFpuRegisters);
+static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
 static constexpr int kC2ConditionMask = 0x400;
 
@@ -505,49 +504,30 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
     case Primitive::kPrimNot: {
       uint32_t index = gp_index_++;
-      stack_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
       } else {
-        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble: {
       uint32_t index = gp_index_;
       gp_index_ += 2;
-      stack_index_ += 2;
       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
         X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
             calling_convention.GetRegisterPairAt(index));
         return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
       } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
-        // stack_index_ is the right offset for the memory.
-        return Location::QuickParameter(index, stack_index_ - 2);
+        // On X86, the register index and stack index of a quick parameter is the same, since
+        // we are passing floating pointer values in core registers.
+        return Location::QuickParameter(index, index);
       } else {
-        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
-      }
-    }
-
-    case Primitive::kPrimFloat: {
-      uint32_t index = fp_index_++;
-      stack_index_++;
-      if (index < calling_convention.GetNumberOfFpuRegisters()) {
-        return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
-      } else {
-        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
-      }
-    }
-
-    case Primitive::kPrimDouble: {
-      uint32_t index = fp_index_++;
-      stack_index_ += 2;
-      if (index < calling_convention.GetNumberOfFpuRegisters()) {
-        return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
-      } else {
-        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -1206,7 +1186,7 @@
 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
   HandleInvoke(invoke);
   // Add the hidden argument.
-  invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
+  invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM0));
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -1408,17 +1388,31 @@
           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
           break;
 
-        case Primitive::kPrimFloat:
-        case Primitive::kPrimDouble: {
-          // Processing a Dex `float-to-long' or 'double-to-long' instruction.
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-long' instruction.
           InvokeRuntimeCallingConvention calling_convention;
-          XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
-          locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
-
+          // Note that on x86 floating-point parameters are passed
+          // through core registers (here, EAX).
+          locations->SetInAt(0, Location::RegisterLocation(
+              calling_convention.GetRegisterAt(0)));
           // The runtime helper puts the result in EAX, EDX.
           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+          break;
         }
-        break;
+
+        case Primitive::kPrimDouble: {
+          // Processing a Dex `double-to-long' instruction.
+          InvokeRuntimeCallingConvention calling_convention;
+          // Note that on x86 floating-point parameters are passed
+          // through core registers (here, EAX and ECX).
+          locations->SetInAt(0, Location::RegisterPairLocation(
+              calling_convention.GetRegisterAt(0),
+              calling_convention.GetRegisterAt(1)));
+          // The runtime helper puts the result in EAX, EDX.
+          locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+          break;
+        }
+          break;
 
         default:
           LOG(FATAL) << "Unexpected type conversion from " << input_type
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 55d71e3..73b647c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -36,8 +36,8 @@
 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
-static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+static constexpr XmmRegister kParameterFpuRegisters[] = { };
+static constexpr size_t kParameterFpuRegistersLength = 0;
 
 class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> {
  public:
@@ -58,18 +58,13 @@
 
 class InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitor() : gp_index_(0) {}
 
   Location GetNextLocation(Primitive::Type type);
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for cpu registers.
   uint32_t gp_index_;
-  // The current index for fpu registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 1f0dba5..3f266fe 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1537,12 +1537,8 @@
 
   uint32_t reg_offset = 1;
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    pushl(spill.AsCpuRegister());
-    gpr_count++;
+    pushl(spill_regs.at(i).AsX86().AsCpuRegister());
 
     // DW_CFA_advance_loc
     DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
@@ -1556,7 +1552,7 @@
   }
 
   // return address then method on stack
-  int32_t adjust = frame_size - (gpr_count * kFramePointerSize) -
+  int32_t adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
                    sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
                    kFramePointerSize /*return address*/;
   addl(ESP, Immediate(-adjust));
@@ -1576,18 +1572,9 @@
   DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ManagedRegisterSpill spill = entry_spills.at(i);
-    if (spill.AsX86().IsCpuRegister()) {
-      movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister());
-    } else {
-      DCHECK(spill.AsX86().IsXmmRegister());
-      if (spill.getSize() == 8) {
-        movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      }
-    }
+    movl(Address(ESP, frame_size + sizeof(StackReference<mirror::ArtMethod>) +
+                 (i * kFramePointerSize)),
+         entry_spills.at(i).AsX86().AsCpuRegister());
   }
 }
 
@@ -1597,9 +1584,7 @@
   addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
                       sizeof(StackReference<mirror::ArtMethod>)));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    popl(spill.AsCpuRegister());
+    popl(spill_regs.at(i).AsX86().AsCpuRegister());
   }
   ret();
 }
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 6acc2a7..4b67c83 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -302,7 +302,7 @@
 #if defined(__i386__)
     // TODO: Set the thread?
     __asm__ __volatile__(
-        "movd %[hidden], %%xmm7\n\t"
+        "movd %[hidden], %%xmm0\n\t"
         "subl $12, %%esp\n\t"       // Align stack.
         "pushl %[referrer]\n\t"     // Store referrer
         "call *%%edi\n\t"           // Call the stub
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index b0a6017..5a88f80 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -21,8 +21,6 @@
 
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-
-// 32 bytes for GPRs and 32 bytes for FPRs.
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (32 + 32)
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 32
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 4ea4684..2a6ff14 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -30,9 +30,6 @@
   for (size_t  i = 0; i < kNumberOfCpuRegisters; i++) {
     gprs_[i] = nullptr;
   }
-  for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
-    fprs_[i] = nullptr;
-  }
   gprs_[ESP] = &esp_;
   // Initialize registers with easy to spot debug values.
   esp_ = X86Context::kBadGprBase + ESP;
@@ -43,7 +40,7 @@
   mirror::ArtMethod* method = fr.GetMethod();
   const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
   size_t spill_count = POPCOUNT(frame_info.CoreSpillMask());
-  size_t fp_spill_count = POPCOUNT(frame_info.FpSpillMask());
+  DCHECK_EQ(frame_info.FpSpillMask(), 0u);
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
     int j = 2;  // Offset j to skip return address spill.
@@ -54,24 +51,6 @@
       }
     }
   }
-  if (fp_spill_count > 0) {
-    // Lowest number spill is farthest away, walk registers and fill into context.
-    size_t j = 2;  // Offset j to skip return address spill.
-    size_t fp_spill_size_in_words = fp_spill_count * 2;
-    for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
-      if (((frame_info.FpSpillMask() >> i) & 1) != 0) {
-        // There are 2 pieces to each XMM register, to match VR size.
-        fprs_[2*i] = reinterpret_cast<uint32_t*>(
-            fr.CalleeSaveAddress(spill_count + fp_spill_size_in_words - j,
-                                 frame_info.FrameSizeInBytes()));
-        fprs_[2*i+1] = reinterpret_cast<uint32_t*>(
-            fr.CalleeSaveAddress(spill_count + fp_spill_size_in_words - j - 1,
-                                 frame_info.FrameSizeInBytes()));
-        // Two void* per XMM register.
-        j += 2;
-      }
-    }
-  }
 }
 
 void X86Context::SmashCallerSaves() {
@@ -80,7 +59,6 @@
   gprs_[EDX] = const_cast<uintptr_t*>(&gZero);
   gprs_[ECX] = nullptr;
   gprs_[EBX] = nullptr;
-  memset(&fprs_[0], '\0', sizeof(fprs_));
 }
 
 void X86Context::SetGPR(uint32_t reg, uintptr_t value) {
@@ -90,11 +68,14 @@
   *gprs_[reg] = value;
 }
 
-void X86Context::SetFPR(uint32_t reg, uintptr_t value) {
-  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
-  DCHECK(IsAccessibleFPR(reg));
-  CHECK_NE(fprs_[reg], reinterpret_cast<const uint32_t*>(&gZero));
-  *fprs_[reg] = value;
+uintptr_t X86Context::GetFPR(uint32_t reg ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Floating-point registers are all caller save in X86";
+  UNREACHABLE();
+}
+
+void X86Context::SetFPR(uint32_t reg ATTRIBUTE_UNUSED, uintptr_t value ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Floating-point registers are all caller save in X86";
+  UNREACHABLE();
 }
 
 void X86Context::DoLongJump() {
@@ -105,30 +86,17 @@
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
     gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86Context::kBadGprBase + i;
   }
-  uint32_t fprs[kNumberOfFloatRegisters];
-  for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
-    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86Context::kBadFprBase + i;
-  }
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t esp = gprs[kNumberOfCpuRegisters - ESP - 1] - sizeof(intptr_t);
   gprs[kNumberOfCpuRegisters] = esp;
   *(reinterpret_cast<uintptr_t*>(esp)) = eip_;
   __asm__ __volatile__(
-      "movl %1, %%ebx\n\t"          // Address base of FPRs.
-      "movsd 0(%%ebx), %%xmm0\n\t"  // Load up XMM0-XMM7.
-      "movsd 8(%%ebx), %%xmm1\n\t"
-      "movsd 16(%%ebx), %%xmm2\n\t"
-      "movsd 24(%%ebx), %%xmm3\n\t"
-      "movsd 32(%%ebx), %%xmm4\n\t"
-      "movsd 40(%%ebx), %%xmm5\n\t"
-      "movsd 48(%%ebx), %%xmm6\n\t"
-      "movsd 56(%%ebx), %%xmm7\n\t"
       "movl %0, %%esp\n\t"  // ESP points to gprs.
       "popal\n\t"           // Load all registers except ESP and EIP with values in gprs.
       "popl %%esp\n\t"      // Load stack pointer.
       "ret\n\t"             // From higher in the stack pop eip.
       :  // output.
-      : "g"(&gprs[0]), "g"(&fprs[0]) // input.
+      : "g"(&gprs[0])  // input.
       :);  // clobber.
 #else
   UNIMPLEMENTED(FATAL);
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index c66a9dc..8b7804d 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -61,16 +61,11 @@
 
   void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  bool IsAccessibleFPR(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
-    return fprs_[reg] != nullptr;
+  bool IsAccessibleFPR(uint32_t reg ATTRIBUTE_UNUSED) OVERRIDE {
+    return false;
   }
 
-  uintptr_t GetFPR(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
-    DCHECK(IsAccessibleFPR(reg));
-    return *fprs_[reg];
-  }
+  uintptr_t GetFPR(uint32_t reg) OVERRIDE;
 
   void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
@@ -78,22 +73,9 @@
   void DoLongJump() OVERRIDE;
 
  private:
-  // Pretend XMM registers are made of uin32_t pieces, because they are manipulated
-  // in uint32_t chunks.
-  enum {
-    XMM0_0 = 0, XMM0_1,
-    XMM1_0, XMM1_1,
-    XMM2_0, XMM2_1,
-    XMM3_0, XMM3_1,
-    XMM4_0, XMM4_1,
-    XMM5_0, XMM5_1,
-    XMM6_0, XMM6_1,
-    XMM7_0, XMM7_1,
-    kNumberOfFloatRegisters};
-
-  // Pointers to register locations. Values are initialized to NULL or the special registers below.
+  // Pointers to register locations, floating point registers are all caller save. Values are
+  // initialized to NULL or the special registers below.
   uintptr_t* gprs_[kNumberOfCpuRegisters];
-  uint32_t* fprs_[kNumberOfFloatRegisters];
   // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat
   // special in that it cannot be encoded normally as a register operand to an instruction (except
   // in 64bit addressing modes).
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index b245dc3..7153403 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -90,15 +90,6 @@
     PUSH ebx  // Save args
     PUSH edx
     PUSH ecx
-    // Create space for FPR args.
-    subl MACRO_LITERAL(4 * 8), %esp
-    CFI_ADJUST_CFA_OFFSET(4 * 8)
-    // Save FPRs.
-    movsd %xmm0, 0(%esp)
-    movsd %xmm1, 8(%esp)
-    movsd %xmm2, 16(%esp)
-    movsd %xmm3, 24(%esp)
-
     SETUP_GOT_NOSAVE VAR(got_reg, 0)
     // Load Runtime::instance_ from GOT.
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1)
@@ -111,7 +102,7 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 7*4 + 4*8 + 4)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 7*4 + 4)
 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86) size not as expected."
 #endif
 END_MACRO
@@ -121,39 +112,20 @@
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs) where the method is passed in EAX.
      */
 MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX)
-    // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH edi  // Save callee saves
     PUSH esi
     PUSH ebp
     PUSH ebx  // Save args
     PUSH edx
     PUSH ecx
-
-    // Create space for FPR args.
-    subl MACRO_LITERAL(32), %esp
-    CFI_ADJUST_CFA_OFFSET(32)
-
-    // Save FPRs.
-    movsd %xmm0, 0(%esp)
-    movsd %xmm1, 8(%esp)
-    movsd %xmm2, 16(%esp)
-    movsd %xmm3, 24(%esp)
-
     PUSH eax  // Store the ArtMethod reference at the bottom of the stack.
     // Store esp as the stop quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 END_MACRO
 
 MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
-    // Restore FPRs. EAX is still on the stack.
-    movsd 4(%esp), %xmm0
-    movsd 12(%esp), %xmm1
-    movsd 20(%esp), %xmm2
-    movsd 28(%esp), %xmm3
-
-    addl MACRO_LITERAL(36), %esp  // Remove FPRs and EAX.
-    CFI_ADJUST_CFA_OFFSET(-36)
-
+    addl MACRO_LITERAL(4), %esp  // Remove padding
+    CFI_ADJUST_CFA_OFFSET(-4)
     POP ecx  // Restore args except eax
     POP edx
     POP ebx
@@ -162,30 +134,6 @@
     POP edi
 END_MACRO
 
-// Restore register and jump to routine
-// Inputs:  EDI contains pointer to code.
-// Notes: Need to pop EAX too (restores Method*)
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP)
-    POP eax  // Restore Method*
-
-    // Restore FPRs.
-    movsd 0(%esp), %xmm0
-    movsd 8(%esp), %xmm1
-    movsd 16(%esp), %xmm2
-    movsd 24(%esp), %xmm3
-
-    addl MACRO_LITERAL(32), %esp  // Remove FPRs.
-    CFI_ADJUST_CFA_OFFSET(-32)
-
-    POP ecx  // Restore args except eax
-    POP edx
-    POP ebx
-    POP ebp  // Restore callee saves
-    POP esi
-    xchgl 0(%esp),%edi // restore EDI and place code pointer as only value on stack
-    ret
-END_MACRO
-
     /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_.
@@ -295,14 +243,13 @@
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
     movl %esp, %edx  // remember SP
-
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp  // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    pushl 32+32(%edx)             // pass caller Method*
+    pushl 32(%edx)                // pass caller Method*
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
@@ -310,17 +257,6 @@
     movl %edx, %edi               // save code pointer in EDI
     addl MACRO_LITERAL(36), %esp  // Pop arguments skip eax
     CFI_ADJUST_CFA_OFFSET(-36)
-
-    // Restore FPRs.
-    movsd 0(%esp), %xmm0
-    movsd 8(%esp), %xmm1
-    movsd 16(%esp), %xmm2
-    movsd 24(%esp), %xmm3
-
-    // Remove space for FPR args.
-    addl MACRO_LITERAL(4 * 8), %esp
-    CFI_ADJUST_CFA_OFFSET(-4 * 8)
-
     POP ecx  // Restore args except eax
     POP edx
     POP ebx
@@ -348,63 +284,7 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
-     * Helper for quick invocation stub to set up XMM registers. Assumes EBX == shorty,
-     * ECX == arg_array. Clobbers EBX, ECX and al. Branches to xmm_setup_finished if it encounters
-     * the end of the shorty.
-     */
-MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
-1: // LOOP
-    movb (%ebx), %al              // al := *shorty
-    addl MACRO_LITERAL(1), %ebx   // shorty++
-    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
-    je RAW_VAR(finished, 1)
-    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
-    je 2f
-    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
-    je 3f
-    addl MACRO_LITERAL(4), %ecx   // arg_array++
-    //  Handle extra space in arg array taken by a long.
-    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
-    jne 1b
-    addl MACRO_LITERAL(4), %ecx   // arg_array++
-    jmp 1b                        // goto LOOP
-2:  // FOUND_DOUBLE
-    movsd (%ecx), REG_VAR(xmm_reg, 0)
-    addl MACRO_LITERAL(8), %ecx   // arg_array+=2
-    jmp 4f
-3:  // FOUND_FLOAT
-    movss (%ecx), REG_VAR(xmm_reg, 0)
-    addl MACRO_LITERAL(4), %ecx   // arg_array++
-4:
-END_MACRO
-
-    /*
-     * Helper for quick invocation stub to set up GPR registers. Assumes ESI == shorty,
-     * EDI == arg_array. Clobbers ESI, EDI and al. Branches to gpr_setup_finished if it encounters
-     * the end of the shorty.
-     */
-MACRO1(SKIP_OVER_FLOATS, finished)
-1: // LOOP
-    movb (%esi), %al              // al := *shorty
-    addl MACRO_LITERAL(1), %esi   // shorty++
-    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
-    je RAW_VAR(finished, 0)
-    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
-    je 3f
-    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
-    je 4f
-    jmp 5f
-3:  // SKIP_FLOAT
-    addl MACRO_LITERAL(4), %edi   // arg_array++
-    jmp 1b
-4:  // SKIP_DOUBLE
-    addl MACRO_LITERAL(8), %edi   // arg_array+=2
-    jmp 1b
-5:
-END_MACRO
-
-  /*
-     * Quick invocation stub (non-static).
+     * Quick invocation stub.
      * On entry:
      *   [sp] = return address
      *   [sp + 4] = method pointer
@@ -415,17 +295,6 @@
      *   [sp + 24] = shorty
      */
 DEFINE_FUNCTION art_quick_invoke_stub
-    // Set up argument XMM registers.
-    mov 24(%esp), %ebx           // EBX := shorty + 1  ; ie skip return arg character.
-    addl LITERAL(1), %ebx
-    mov 8(%esp), %ecx            // ECX := arg_array + 4 ; ie skip this pointer.
-    addl LITERAL(4), %ecx
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
-    .balign 16
-.Lxmm_setup_finished:
     PUSH ebp                      // save ebp
     PUSH ebx                      // save ebx
     PUSH esi                      // save esi
@@ -439,41 +308,18 @@
     andl LITERAL(0xFFFFFFF0), %ebx
     subl LITERAL(20), %ebx        // remove space for return address, ebx, ebp, esi and edi
     subl %ebx, %esp               // reserve stack space for argument array
-
+    SETUP_GOT_NOSAVE ebx          // clobbers ebx (harmless here)
+    lea  4(%esp), %eax            // use stack pointer + method ptr as dest for memcpy
+    pushl 28(%ebp)                // push size of region to memcpy
+    pushl 24(%ebp)                // push arg array as source of memcpy
+    pushl %eax                    // push stack pointer as destination of memcpy
+    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
+    addl LITERAL(12), %esp        // pop arguments to memcpy
     movl LITERAL(0), (%esp)       // store NULL for method*
-
-    // Copy arg array into stack.
-    movl 28(%ebp), %ecx           // ECX = size of args
-    movl 24(%ebp), %esi           // ESI = argument array
-    leal 4(%esp), %edi            // EDI = just after Method* in stack arguments
-    rep movsb                     // while (ecx--) { *edi++ = *esi++ }
-
-    mov 40(%ebp), %esi            // ESI := shorty + 1  ; ie skip return arg character.
-    addl LITERAL(1), %esi
-    mov 24(%ebp), %edi            // EDI := arg_array
-    mov 0(%edi), %ecx             // ECX := this pointer
-    addl LITERAL(4), %edi         // EDI := arg_array + 4 ; ie skip this pointer.
-
-    // Enumerate the possible cases for loading GPRS.
-    // edx (and maybe ebx):
-    SKIP_OVER_FLOATS .Lgpr_setup_finished
-    cmpb LITERAL(74), %al         // if (al == 'J') goto FOUND_LONG
-    je .LfirstLong
-    // Must be an integer value.
-    movl (%edi), %edx
-    addl LITERAL(4), %edi         // arg_array++
-
-    // Now check ebx
-    SKIP_OVER_FLOATS .Lgpr_setup_finished
-    // Must be first word of a long, or an integer.
-    movl (%edi), %ebx
-    jmp .Lgpr_setup_finished
-.LfirstLong:
-    movl (%edi), %edx
-    movl 4(%edi), %ebx
-    // Nothing left to load.
-.Lgpr_setup_finished:
     mov 20(%ebp), %eax            // move method pointer into eax
+    mov 4(%esp), %ecx             // copy arg1 into ecx
+    mov 8(%esp), %edx             // copy arg2 into edx
+    mov 12(%esp), %ebx            // copy arg3 into ebx
     call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     CFI_DEF_CFA_REGISTER(esp)
@@ -498,119 +344,6 @@
     ret
 END_FUNCTION art_quick_invoke_stub
 
-  /*
-     * Quick invocation stub (static).
-     * On entry:
-     *   [sp] = return address
-     *   [sp + 4] = method pointer
-     *   [sp + 8] = argument array or NULL for no argument methods
-     *   [sp + 12] = size of argument array in bytes
-     *   [sp + 16] = (managed) thread pointer
-     *   [sp + 20] = JValue* result
-     *   [sp + 24] = shorty
-     */
-DEFINE_FUNCTION art_quick_invoke_static_stub
-    // Set up argument XMM registers.
-    mov 24(%esp), %ebx           // EBX := shorty + 1  ; ie skip return arg character.
-    addl LITERAL(1), %ebx
-    mov 8(%esp), %ecx            // ECX := arg_array
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
-    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
-    .balign 16
-.Lxmm_setup_finished2:
-    PUSH ebp                      // save ebp
-    PUSH ebx                      // save ebx
-    PUSH esi                      // save esi
-    PUSH edi                      // save edi
-    mov %esp, %ebp                // copy value of stack pointer into base pointer
-    CFI_DEF_CFA_REGISTER(ebp)
-    mov 28(%ebp), %ebx            // get arg array size
-    // reserve space for return addr, method*, ebx, ebp, esi, and edi in frame
-    addl LITERAL(36), %ebx
-    // align frame size to 16 bytes
-    andl LITERAL(0xFFFFFFF0), %ebx
-    subl LITERAL(20), %ebx        // remove space for return address, ebx, ebp, esi and edi
-    subl %ebx, %esp               // reserve stack space for argument array
-
-    movl LITERAL(0), (%esp)       // store NULL for method*
-
-    // Copy arg array into stack.
-    movl 28(%ebp), %ecx           // ECX = size of args
-    movl 24(%ebp), %esi           // ESI = argument array
-    leal 4(%esp), %edi            // EDI = just after Method* in stack arguments
-    rep movsb                     // while (ecx--) { *edi++ = *esi++ }
-
-    mov 40(%ebp), %esi            // ESI := shorty + 1  ; ie skip return arg character.
-    addl LITERAL(1), %esi
-    mov 24(%ebp), %edi            // EDI := arg_array
-
-    // Enumerate the possible cases for loading GPRS.
-    // ecx (and maybe edx)
-    SKIP_OVER_FLOATS .Lgpr_setup_finished2
-    cmpb LITERAL(74), %al         // if (al == 'J') goto FOUND_LONG
-    je .LfirstLong2
-    // Must be an integer value.  Load into ECX.
-    movl (%edi), %ecx
-    addl LITERAL(4), %edi         // arg_array++
-
-    // Now check edx (and maybe ebx).
-    SKIP_OVER_FLOATS .Lgpr_setup_finished2
-    cmpb LITERAL(74), %al         // if (al == 'J') goto FOUND_LONG
-    je .LSecondLong2
-    // Must be an integer.  Load into EDX.
-    movl (%edi), %edx
-    addl LITERAL(4), %edi         // arg_array++
-
-    // Is there anything for ebx?
-    SKIP_OVER_FLOATS .Lgpr_setup_finished2
-    // First word of long or integer.  Load into EBX.
-    movl (%edi), %ebx
-    jmp .Lgpr_setup_finished2
-.LSecondLong2:
-    // EDX:EBX is long.  That is all.
-    movl (%edi), %edx
-    movl 4(%edi), %ebx
-    jmp .Lgpr_setup_finished2
-.LfirstLong2:
-    // ECX:EDX is a long
-    movl (%edi), %ecx
-    movl 4(%edi), %edx
-    addl LITERAL(8), %edi         // arg_array += 2
-
-    // Anything for EBX?
-    SKIP_OVER_FLOATS .Lgpr_setup_finished2
-    // First word of long or integer.  Load into EBX.
-    movl (%edi), %ebx
-    jmp .Lgpr_setup_finished2
-    // Nothing left to load.
-.Lgpr_setup_finished2:
-    mov 20(%ebp), %eax            // move method pointer into eax
-    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32(%eax) // call the method
-    mov %ebp, %esp                // restore stack pointer
-    CFI_DEF_CFA_REGISTER(esp)
-    POP edi                       // pop edi
-    POP esi                       // pop esi
-    POP ebx                       // pop ebx
-    POP ebp                       // pop ebp
-    mov 20(%esp), %ecx            // get result pointer
-    mov %eax, (%ecx)              // store the result assuming its a long, int or Object*
-    mov %edx, 4(%ecx)             // store the other half of the result
-    mov 24(%esp), %edx            // get the shorty
-    cmpb LITERAL(68), (%edx)      // test if result type char == 'D'
-    je .Lreturn_double_quick2
-    cmpb LITERAL(70), (%edx)      // test if result type char == 'F'
-    je .Lreturn_float_quick2
-    ret
-.Lreturn_double_quick2:
-    movsd %xmm0, (%ecx)           // store the floating point result
-    ret
-.Lreturn_float_quick2:
-    movss %xmm0, (%ecx)           // store the floating point result
-    ret
-END_FUNCTION art_quick_invoke_static_stub
-
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
@@ -1109,20 +842,20 @@
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
 DEFINE_FUNCTION art_quick_d2l
-    subl LITERAL(12), %esp        // alignment padding, room for argument
-    CFI_ADJUST_CFA_OFFSET(12)
-    movsd %xmm0, 0(%esp)          // arg a
-    call SYMBOL(art_d2l)          // (jdouble a)
+    PUSH eax                      // alignment padding
+    PUSH ecx                      // pass arg2 a.hi
+    PUSH eax                      // pass arg1 a.lo
+    call SYMBOL(art_d2l)      // (jdouble a)
     addl LITERAL(12), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_d2l
 
 DEFINE_FUNCTION art_quick_f2l
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    movss %xmm0, 0(%esp)          // arg a
-    call SYMBOL(art_f2l)          // (jfloat a)
+    subl LITERAL(8), %esp         // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH eax                      // pass arg1 a
+    call SYMBOL(art_f2l)      // (jfloat a)
     addl LITERAL(12), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
@@ -1282,8 +1015,8 @@
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
-    addl LITERAL(76), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-76)
+    addl LITERAL(44), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-44)
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
@@ -1295,7 +1028,7 @@
     PUSH ecx
     movl 8(%esp), %eax            // load caller Method*
     movl MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET(%eax), %eax  // load dex_cache_resolved_methods
-    movd %xmm7, %ecx              // get target method index stored in xmm0
+    movd %xmm0, %ecx              // get target method index stored in xmm0
     movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4), %eax  // load the target method
     POP ecx
     jmp SYMBOL(art_quick_invoke_interface_trampoline)
@@ -1314,7 +1047,14 @@
     addl LITERAL(16), %esp        // pop arguments
     test %eax, %eax               // if code pointer is NULL goto deliver pending exception
     jz 1f
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP
+    POP eax                       // called method
+    POP ecx                       // restore args
+    POP edx
+    POP ebx
+    POP ebp                       // restore callee saves except EDI
+    POP esi
+    xchgl 0(%esp),%edi            // restore EDI and place code pointer as only value on stack
+    ret                           // tail call into method
 1:
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
@@ -1348,6 +1088,7 @@
     movl %edx, %esp
 
     // On x86 there are no registers passed, so nothing to pop here.
+
     // Native call.
     call *%eax
 
@@ -1374,10 +1115,8 @@
     jnz .Lexception_in_native
 
     // Tear down the callee-save frame.
-    // Remove space for FPR args and EAX
-    addl LITERAL(4 + 4 * 8), %esp
-    CFI_ADJUST_CFA_OFFSET(-(4 + 4 * 8))
-
+    addl LITERAL(4), %esp     // Remove padding
+    CFI_ADJUST_CFA_OFFSET(-4)
     POP ecx
     addl LITERAL(4), %esp     // Avoid edx, as it may be part of the result.
     CFI_ADJUST_CFA_OFFSET(-4)
@@ -1407,21 +1146,12 @@
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass  method
     call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
-    addl LITERAL(16), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-16)
-
-    // Return eax:edx in xmm0 also.
-    movd %eax, %xmm0
+    movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
-
-    addl LITERAL(48), %esp        // Remove FPRs and EAX, ECX, EDX, EBX.
-    CFI_ADJUST_CFA_OFFSET(-48)
-
-    POP ebp  // Restore callee saves
-    POP esi
-    POP edi
-
+    addl LITERAL(16), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_to_interpreter_bridge
 
@@ -1441,25 +1171,18 @@
     PUSH eax                      // Pass Method*.
     call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
     addl LITERAL(28), %esp        // Pop arguments upto saved Method*.
-    movl 60(%esp), %edi           // Restore edi.
-    movl %eax, 60(%esp)           // Place code* over edi, just under return pc.
+    movl 28(%esp), %edi           // Restore edi.
+    movl %eax, 28(%esp)           // Place code* over edi, just under return pc.
     movl SYMBOL(art_quick_instrumentation_exit)@GOT(%ebx), %ebx
     // Place instrumentation exit as return pc. ebx holds the GOT computed on entry.
-    movl %ebx, 64(%esp)
-    movl 0(%esp), %eax           // Restore eax.
-    // Restore FPRs (extra 4 bytes of offset due to EAX push at top).
-    movsd 8(%esp), %xmm0
-    movsd 16(%esp), %xmm1
-    movsd 24(%esp), %xmm2
-    movsd 32(%esp), %xmm3
-
-    // Restore GPRs.
-    movl 40(%esp), %ecx           // Restore ecx.
-    movl 48(%esp), %edx           // Restore edx.
-    movl 48(%esp), %ebx           // Restore ebx.
-    movl 52(%esp), %ebp           // Restore ebp.
-    movl 56(%esp), %esi           // Restore esi.
-    addl LITERAL(60), %esp        // Wind stack back upto code*.
+    movl %ebx, 32(%esp)
+    movl (%esp), %eax             // Restore eax.
+    movl 8(%esp), %ecx            // Restore ecx.
+    movl 12(%esp), %edx           // Restore edx.
+    movl 16(%esp), %ebx           // Restore ebx.
+    movl 20(%esp), %ebp           // Restore ebp.
+    movl 24(%esp), %esi           // Restore esi.
+    addl LITERAL(28), %esp        // Wind stack back upto code*.
     ret                           // Call method (and pop).
 END_FUNCTION art_quick_instrumentation_entry
 
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
index 9bba531..b9dc0d8 100644
--- a/runtime/arch/x86/quick_method_frame_info_x86.h
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -24,44 +24,25 @@
 namespace art {
 namespace x86 {
 
-enum XMM {
-  XMM0 = 0,
-  XMM1 = 1,
-  XMM2 = 2,
-  XMM3 = 3,
-  XMM4 = 4,
-  XMM5 = 5,
-  XMM6 = 6,
-  XMM7 = 7,
-};
-
 static constexpr uint32_t kX86CalleeSaveRefSpills =
     (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI);
 static constexpr uint32_t kX86CalleeSaveArgSpills =
     (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
-static constexpr uint32_t kX86CalleeSaveFpArgSpills =
-    (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
-    (1 << art::x86::XMM2) | (1 << art::x86::XMM3);
 
 constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
       (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
 }
 
-constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-    return type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0;
-}
-
 constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(X86CalleeSaveCoreSpills(type)) /* gprs */ +
-                  2 * POPCOUNT(X86CalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * kX86PointerSize, kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
   return QuickMethodFrameInfo(X86CalleeSaveFrameSize(type),
                               X86CalleeSaveCoreSpills(type),
-                              X86CalleeSaveFpSpills(type));
+                              0u);
 }
 
 }  // namespace x86
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 8ab90eb..9947b55 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -198,20 +198,16 @@
   // | EBX         |    arg3
   // | EDX         |    arg2
   // | ECX         |    arg1
-  // | XMM3        |    float arg 4
-  // | XMM2        |    float arg 3
-  // | XMM1        |    float arg 2
-  // | XMM0        |    float arg 1
   // | EAX/Method* |  <- sp
   static constexpr bool kAlignPairRegister = false;
-  static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
+  static constexpr bool kQuickSoftFloatAbi = true;  // This is a soft float ABI.
   static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs.
-  static constexpr size_t kNumQuickFprArgs = 4;  // 4 arguments passed in FPRs.
+  static constexpr size_t kNumQuickFprArgs = 0;  // 0 arguments passed in FPRs.
   static constexpr bool kGprFprLockstep = false;
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 4;  // Offset of first FPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4 + 4*8;  // Offset of first GPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28 + 4*8;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4;  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28;  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index b2016dc..288f6a6 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -41,7 +41,7 @@
 
 extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                       const char*);
-#if defined(__LP64__) || defined(__arm__) || defined(__i386__)
+#if defined(__LP64__) || defined(__arm__)
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 #endif
@@ -415,7 +415,7 @@
             << "Don't call compiled code when -Xint " << PrettyMethod(this);
       }
 
-#if defined(__LP64__) || defined(__arm__) || defined(__i386__)
+#if defined(__LP64__) || defined(__arm__)
       if (!IsStatic()) {
         (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty);
       } else {
diff --git a/runtime/oat.h b/runtime/oat.h
index 3e28606..8e63d3a 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '5', '4', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '5', '3', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";