Revert "Revert "Better support for x86 XMM registers""

This reverts commit 8ff67e3338952c70ccf3b609559bf8cc0f379cfd.

Fix applied to loc.fp usage.

Change-Id: I1eb3005392544fcf30c595923ed25bcee2dc4859
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 4650f25..18122b3 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -56,6 +56,17 @@
   kLocInvalid
 };
 
+/**
+ * Support for vector registers.  Initially used for x86 floats.  This will be used
+ * to replace the assumption that a double takes up 2 single FP registers
+ */
+enum VectorLengthType {
+  kVectorNotUsed = 0,   // This value is NOT in a vector register.
+  kVectorLength4,       // The value occupies 4 bytes in a vector register.
+  kVectorLength8,       // The value occupies 8 bytes in a vector register.
+  kVectorLength16       // The value occupies 16 bytes in a vector register (unused now).
+};
+
 enum BBType {
   kNullBlock,
   kEntryBlock,
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 010de20..d080e39 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -325,11 +325,14 @@
   unsigned ref:1;       // Something GC cares about.
   unsigned high_word:1;  // High word of pair?
   unsigned home:1;      // Does this represent the home location?
+  VectorLengthType vec_len:3;  // Is this value in a vector register, and how big is it?
   uint8_t low_reg;      // First physical register.
   uint8_t high_reg;     // 2nd physical register (if wide).
   int16_t s_reg_low;    // SSA name for low Dalvik word.
   int16_t orig_sreg;    // TODO: remove after Bitcode gen complete
                         // and consolidate usage w/ s_reg_low.
+
+  bool IsVectorScalar() const { return vec_len == kVectorLength4 || vec_len == kVectorLength8;}
 };
 
 /*
@@ -354,7 +357,7 @@
 };
 
 
-const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0,
+const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, kVectorNotUsed,
                              INVALID_REG, INVALID_REG, INVALID_SREG, INVALID_SREG};
 
 class MIRGraph {
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index b06ebcf..37b4ec6 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -118,9 +118,9 @@
 #define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET-1)
 
 // RegisterLocation templates return values (r0, or r0/r1).
-#define ARM_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \
+#define ARM_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, INVALID_REG, \
                           INVALID_SREG, INVALID_SREG}
-#define ARM_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, \
+#define ARM_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, \
                                INVALID_SREG, INVALID_SREG}
 #define ARM_LOC_C_RETURN_FLOAT  ARM_LOC_C_RETURN
 #define ARM_LOC_C_RETURN_DOUBLE  ARM_LOC_C_RETURN_WIDE
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 6b4cbd4..3bd0298 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1840,4 +1840,11 @@
   CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true);
 }
 
+/* Generic code for generating a wide constant into a VR. */
+void Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
+  RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
+  LoadConstantWide(rl_result.low_reg, rl_result.high_reg, value);
+  StoreValueWide(rl_dest, rl_result);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 3539106..8f2f6ad 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -265,9 +265,17 @@
 
   // Dest is now live and dirty (until/if we flush it to home location)
   MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
-  MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
-  MarkDirty(rl_dest);
-  MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+
+  // Does this wide value live in two registers (or one vector one)?
+  if (rl_dest.low_reg != rl_dest.high_reg) {
+    MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
+    MarkDirty(rl_dest);
+    MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+  } else {
+    // This must be an x86 vector register value,
+    DCHECK(IsFpReg(rl_dest.low_reg) && (cu_->instruction_set == kX86));
+    MarkDirty(rl_dest);
+  }
 
 
   ResetDefLocWide(rl_dest);
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 278fcef..00eef96 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -142,13 +142,13 @@
 #define rMIPS_PC INVALID_REG
 
 // RegisterLocation templates return values (r_V0, or r_V0/r_V1).
-#define MIPS_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_V0, INVALID_REG, \
+#define MIPS_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_V0, INVALID_REG, \
                            INVALID_SREG, INVALID_SREG}
-#define MIPS_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0, \
+#define MIPS_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_FRESULT0, \
                                  INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define MIPS_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_RESULT0, \
+#define MIPS_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_RESULT0, \
                                 r_RESULT1, INVALID_SREG, INVALID_SREG}
-#define MIPS_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0, \
+#define MIPS_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_FRESULT0, \
                                   r_FRESULT1, INVALID_SREG, INVALID_SREG}
 
 enum MipsResourceEncodingPos {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index c5bbae1..6281eff 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -157,16 +157,11 @@
 
     case Instruction::CONST_WIDE_16:
     case Instruction::CONST_WIDE_32:
-      rl_result = EvalLoc(rl_dest, kAnyReg, true);
-      LoadConstantWide(rl_result.low_reg, rl_result.high_reg,
-                           static_cast<int64_t>(static_cast<int32_t>(vB)));
-      StoreValueWide(rl_dest, rl_result);
+      GenConstWide(rl_dest, static_cast<int64_t>(static_cast<int32_t>(vB)));
       break;
 
     case Instruction::CONST_WIDE:
-      rl_result = EvalLoc(rl_dest, kAnyReg, true);
-      LoadConstantWide(rl_result.low_reg, rl_result.high_reg, mir->dalvikInsn.vB_wide);
-      StoreValueWide(rl_dest, rl_result);
+      GenConstWide(rl_dest, mir->dalvikInsn.vB_wide);
       break;
 
     case Instruction::CONST_WIDE_HIGH16:
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3f7ec1e..c157327 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -385,7 +385,7 @@
     int AllocPreservedSingle(int s_reg);
     int AllocPreservedDouble(int s_reg);
     int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required);
-    int AllocTempDouble();
+    virtual int AllocTempDouble();
     int AllocFreeTemp();
     int AllocTemp();
     int AllocTempFloat();
@@ -403,7 +403,7 @@
     void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
     RegLocation WideToNarrow(RegLocation rl);
     void ResetDefLoc(RegLocation rl);
-    void ResetDefLocWide(RegLocation rl);
+    virtual void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
     void ClobberAllRegs();
     void FlushAllRegsBody(RegisterInfo* info, int num_regs);
@@ -419,7 +419,7 @@
     void CopyRegInfo(int new_reg, int old_reg);
     bool CheckCorePoolSanity();
     RegLocation UpdateLoc(RegLocation loc);
-    RegLocation UpdateLocWide(RegLocation loc);
+    virtual RegLocation UpdateLocWide(RegLocation loc);
     RegLocation UpdateRawLoc(RegLocation loc);
 
     /**
@@ -430,7 +430,7 @@
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register pairs.
      */
-    RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
+    virtual RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
 
     /**
      * @brief Used to load register location into a typed temporary.
@@ -439,7 +439,7 @@
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register.
      */
-    RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
+    virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
 
     void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
@@ -507,6 +507,8 @@
                            RegLocation rl_src);
     void GenSuspendTest(int opt_flags);
     void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+    // This will be overridden by x86 implementation.
+    virtual void GenConstWide(RegLocation rl_dest, int64_t value);
 
     // Shared by all targets - implemented in gen_invoke.cc.
     int CallHelperSetup(ThreadOffset helper_offset);
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index cef013e..32c22f2 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -602,6 +602,7 @@
 }
 
 void Mir2Lir::MarkPair(int low_reg, int high_reg) {
+  DCHECK_NE(low_reg, high_reg);
   RegisterInfo* info_lo = GetRegInfo(low_reg);
   RegisterInfo* info_hi = GetRegInfo(high_reg);
   info_lo->pair = info_hi->pair = true;
@@ -807,7 +808,10 @@
   if (update) {
     loc.location = kLocPhysReg;
     MarkLive(loc.low_reg, loc.s_reg_low);
-    MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
+    // Does this wide value live in two registers or one vector register?
+    if (loc.low_reg != loc.high_reg) {
+      MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
+    }
   }
   DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
   return loc;
@@ -1059,7 +1063,10 @@
   Clobber(res.high_reg);
   LockTemp(res.low_reg);
   LockTemp(res.high_reg);
-  MarkPair(res.low_reg, res.high_reg);
+  // Does this wide value live in two registers or one vector register?
+  if (res.low_reg != res.high_reg) {
+    MarkPair(res.low_reg, res.high_reg);
+  }
   return res;
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index e6621f3..816f2d0 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -173,6 +173,12 @@
     bool InexpensiveConstantLong(int64_t value);
     bool InexpensiveConstantDouble(int64_t value);
 
+    RegLocation UpdateLocWide(RegLocation loc);
+    RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
+    RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
+    int AllocTempDouble();
+    void ResetDefLocWide(RegLocation rl);
+
   private:
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
@@ -222,6 +228,8 @@
     void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
                                   int64_t val, ConditionCode ccode);
+    void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
+    void GenConstWide(RegLocation rl_dest, int64_t value);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 11ccd4b..01479a9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -155,9 +155,11 @@
       // TODO: Prevent this from happening in the code. The result is often
       // unused or could have been loaded more easily from memory.
       NewLIR2(kX86MovdxrRR, dest_lo, src_lo);
+      dest_hi = AllocTempDouble();
       NewLIR2(kX86MovdxrRR, dest_hi, src_hi);
       NewLIR2(kX86PsllqRI, dest_hi, 32);
       NewLIR2(kX86OrpsRR, dest_lo, dest_hi);
+      FreeTemp(dest_hi);
     }
   } else {
     if (src_fp) {
@@ -525,7 +527,7 @@
   // Compute (r1:r0) = (r1:r0) + (r2:r3)
   OpRegReg(kOpAdd, r0, r2);  // r0 = r0 + r2
   OpRegReg(kOpAdc, r1, r3);  // r1 = r1 + r3 + CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
@@ -541,7 +543,7 @@
   // Compute (r1:r0) = (r1:r0) + (r2:r3)
   OpRegReg(kOpSub, r0, r2);  // r0 = r0 - r2
   OpRegReg(kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
@@ -557,7 +559,7 @@
   // Compute (r1:r0) = (r1:r0) & (r2:r3)
   OpRegReg(kOpAnd, r0, r2);  // r0 = r0 & r2
   OpRegReg(kOpAnd, r1, r3);  // r1 = r1 & r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
@@ -573,7 +575,7 @@
   // Compute (r1:r0) = (r1:r0) | (r2:r3)
   OpRegReg(kOpOr, r0, r2);  // r0 = r0 | r2
   OpRegReg(kOpOr, r1, r3);  // r1 = r1 | r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
@@ -589,7 +591,7 @@
   // Compute (r1:r0) = (r1:r0) ^ (r2:r3)
   OpRegReg(kOpXor, r0, r2);  // r0 = r0 ^ r2
   OpRegReg(kOpXor, r1, r3);  // r1 = r1 ^ r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
@@ -602,7 +604,7 @@
   OpRegReg(kOpNeg, r0, r0);  // r0 = -r0
   OpRegImm(kOpAdc, r1, 0);   // r1 = r1 + CF
   OpRegReg(kOpNeg, r1, r1);  // r1 = -r1
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index b281063..5c993c5 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -416,7 +416,7 @@
 
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     low_reg = AllocTempDouble();
-    high_reg = low_reg + 1;
+    high_reg = low_reg;  // only one allocated!
     res = (low_reg & 0xff) | ((high_reg & 0xff) << 8);
     return res;
   }
@@ -546,4 +546,254 @@
   return X86Mir2Lir::EncodingMap[opcode].fmt;
 }
 
+/*
+ * Return an updated location record with current in-register status.
+ * If the value lives in live temps, reflect that fact.  No code
+ * is generated.  If the live value is part of an older pair,
+ * clobber both low and high.
+ */
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::UpdateLocWide(RegLocation loc) {
+  DCHECK(loc.wide);
+  DCHECK(CheckCorePoolSanity());
+  if (loc.location != kLocPhysReg) {
+    DCHECK((loc.location == kLocDalvikFrame) ||
+         (loc.location == kLocCompilerTemp));
+    // Are the dalvik regs already live in physical registers?
+    RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg);
+
+    // Handle FP registers specially on x86.
+    if (info_lo && IsFpReg(info_lo->reg)) {
+      bool match = true;
+
+      // We can't match a FP register with a pair of Core registers.
+      match = match && (info_lo->pair == 0);
+
+      if (match) {
+        // We can reuse;update the register usage info.
+        loc.low_reg = info_lo->reg;
+        loc.high_reg = info_lo->reg;  // Play nice with existing code.
+        loc.location = kLocPhysReg;
+        loc.vec_len = kVectorLength8;
+        DCHECK(IsFpReg(loc.low_reg));
+        return loc;
+      }
+      // We can't easily reuse; clobber and free any overlaps.
+      if (info_lo) {
+        Clobber(info_lo->reg);
+        FreeTemp(info_lo->reg);
+        if (info_lo->pair)
+          Clobber(info_lo->partner);
+      }
+    } else {
+      RegisterInfo* info_hi = AllocLive(GetSRegHi(loc.s_reg_low), kAnyReg);
+      bool match = true;
+      match = match && (info_lo != NULL);
+      match = match && (info_hi != NULL);
+      // Are they both core or both FP?
+      match = match && (IsFpReg(info_lo->reg) == IsFpReg(info_hi->reg));
+      // If a pair of floating point singles, are they properly aligned?
+      if (match && IsFpReg(info_lo->reg)) {
+        match &= ((info_lo->reg & 0x1) == 0);
+        match &= ((info_hi->reg - info_lo->reg) == 1);
+      }
+      // If previously used as a pair, it is the same pair?
+      if (match && (info_lo->pair || info_hi->pair)) {
+        match = (info_lo->pair == info_hi->pair);
+        match &= ((info_lo->reg == info_hi->partner) &&
+              (info_hi->reg == info_lo->partner));
+      }
+      if (match) {
+        // Can reuse - update the register usage info
+        loc.low_reg = info_lo->reg;
+        loc.high_reg = info_hi->reg;
+        loc.location = kLocPhysReg;
+        MarkPair(loc.low_reg, loc.high_reg);
+        DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+        return loc;
+      }
+      // Can't easily reuse - clobber and free any overlaps
+      if (info_lo) {
+        Clobber(info_lo->reg);
+        FreeTemp(info_lo->reg);
+        if (info_lo->pair)
+          Clobber(info_lo->partner);
+      }
+      if (info_hi) {
+        Clobber(info_hi->reg);
+        FreeTemp(info_hi->reg);
+        if (info_hi->pair)
+          Clobber(info_hi->partner);
+      }
+    }
+  }
+  return loc;
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) {
+  DCHECK(loc.wide);
+  int32_t new_regs;
+  int32_t low_reg;
+  int32_t high_reg;
+
+  loc = UpdateLocWide(loc);
+
+  /* If it is already in a register, we can assume proper form.  Is it the right reg class? */
+  if (loc.location == kLocPhysReg) {
+    DCHECK_EQ(IsFpReg(loc.low_reg), loc.IsVectorScalar());
+    if (!RegClassMatches(reg_class, loc.low_reg)) {
+      /* It is the wrong register class.  Reallocate and copy. */
+      if (!IsFpReg(loc.low_reg)) {
+        // We want this in a FP reg, and it is in core registers.
+        DCHECK(reg_class != kCoreReg);
+        // Allocate this into any FP reg, and mark it with the right size.
+        low_reg = AllocTypedTemp(true, reg_class);
+        OpVectorRegCopyWide(low_reg, loc.low_reg, loc.high_reg);
+        CopyRegInfo(low_reg, loc.low_reg);
+        Clobber(loc.low_reg);
+        Clobber(loc.high_reg);
+        loc.low_reg = low_reg;
+        loc.high_reg = low_reg;  // Play nice with existing code.
+        loc.vec_len = kVectorLength8;
+      } else {
+        // The value is in a FP register, and we want it in a pair of core registers.
+        DCHECK_EQ(reg_class, kCoreReg);
+        DCHECK_EQ(loc.low_reg, loc.high_reg);
+        new_regs = AllocTypedTempPair(false, kCoreReg);  // Force to core registers.
+        low_reg = new_regs & 0xff;
+        high_reg = (new_regs >> 8) & 0xff;
+        DCHECK_NE(low_reg, high_reg);
+        OpRegCopyWide(low_reg, high_reg, loc.low_reg, loc.high_reg);
+        CopyRegInfo(low_reg, loc.low_reg);
+        CopyRegInfo(high_reg, loc.high_reg);
+        Clobber(loc.low_reg);
+        Clobber(loc.high_reg);
+        loc.low_reg = low_reg;
+        loc.high_reg = high_reg;
+        MarkPair(loc.low_reg, loc.high_reg);
+        DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+      }
+    }
+    return loc;
+  }
+
+  DCHECK_NE(loc.s_reg_low, INVALID_SREG);
+  if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
+    // Need a wide vector register.
+    low_reg = AllocTypedTemp(true, reg_class);
+    loc.low_reg = low_reg;
+    loc.high_reg = low_reg;  // Play nice with existing code.
+    loc.vec_len = kVectorLength8;
+    if (update) {
+      loc.location = kLocPhysReg;
+      MarkLive(loc.low_reg, loc.s_reg_low);
+    }
+    DCHECK(IsFpReg(loc.low_reg));
+  } else {
+    DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+    new_regs = AllocTypedTempPair(loc.fp, reg_class);
+    loc.low_reg = new_regs & 0xff;
+    loc.high_reg = (new_regs >> 8) & 0xff;
+
+    MarkPair(loc.low_reg, loc.high_reg);
+    if (update) {
+      loc.location = kLocPhysReg;
+      MarkLive(loc.low_reg, loc.s_reg_low);
+      MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
+    }
+    DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+  }
+  return loc;
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
+  int new_reg;
+
+  if (loc.wide)
+    return EvalLocWide(loc, reg_class, update);
+
+  loc = UpdateLoc(loc);
+
+  if (loc.location == kLocPhysReg) {
+    if (!RegClassMatches(reg_class, loc.low_reg)) {
+      /* Wrong register class.  Realloc, copy and transfer ownership. */
+      new_reg = AllocTypedTemp(loc.fp, reg_class);
+      OpRegCopy(new_reg, loc.low_reg);
+      CopyRegInfo(new_reg, loc.low_reg);
+      Clobber(loc.low_reg);
+      loc.low_reg = new_reg;
+      if (IsFpReg(loc.low_reg) && reg_class != kCoreReg)
+        loc.vec_len = kVectorLength4;
+    }
+    return loc;
+  }
+
+  DCHECK_NE(loc.s_reg_low, INVALID_SREG);
+
+  new_reg = AllocTypedTemp(loc.fp, reg_class);
+  loc.low_reg = new_reg;
+  if (IsFpReg(loc.low_reg) && reg_class != kCoreReg)
+    loc.vec_len = kVectorLength4;
+
+  if (update) {
+    loc.location = kLocPhysReg;
+    MarkLive(loc.low_reg, loc.s_reg_low);
+  }
+  return loc;
+}
+
+int X86Mir2Lir::AllocTempDouble() {
+  // We really don't need a pair of registers.
+  return AllocTempFloat();
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+void X86Mir2Lir::ResetDefLocWide(RegLocation rl) {
+  DCHECK(rl.wide);
+  RegisterInfo* p_low = IsTemp(rl.low_reg);
+  if (IsFpReg(rl.low_reg)) {
+    // We are using only the low register.
+    if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+      NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
+    }
+    ResetDef(rl.low_reg);
+  } else {
+    RegisterInfo* p_high = IsTemp(rl.high_reg);
+    if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+      DCHECK(p_low->pair);
+      NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
+    }
+    if (p_high && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+      DCHECK(p_high->pair);
+    }
+    ResetDef(rl.low_reg);
+    ResetDef(rl.high_reg);
+  }
+}
+
+void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
+  // Can we do this directly to memory?
+  rl_dest = UpdateLocWide(rl_dest);
+  if ((rl_dest.location == kLocDalvikFrame) ||
+      (rl_dest.location == kLocCompilerTemp)) {
+    int32_t val_lo = Low32Bits(value);
+    int32_t val_hi = High32Bits(value);
+    int rBase = TargetReg(kSp);
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+
+    LIR * store = NewLIR3(kX86Mov32MI, rBase, displacement + LOWORD_OFFSET, val_lo);
+    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+    store = NewLIR3(kX86Mov32MI, rBase, displacement + HIWORD_OFFSET, val_hi);
+    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+    return;
+  }
+
+  // Just use the standard code to do the generation.
+  Mir2Lir::GenConstWide(rl_dest, value);
+}
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index f683aff..91c39fa 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -334,6 +334,7 @@
     LIR *res;
     if (X86_FPREG(r_dest_lo)) {
       DCHECK(X86_FPREG(r_dest_hi));  // ignore r_dest_hi
+      DCHECK_EQ(r_dest_lo, r_dest_hi);
       if (value == 0) {
         return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
       } else {
@@ -343,9 +344,11 @@
           res = LoadConstantNoClobber(r_dest_lo, val_lo);
         }
         if (val_hi != 0) {
+          r_dest_hi = AllocTempDouble();
           LoadConstantNoClobber(r_dest_hi, val_hi);
           NewLIR2(kX86PsllqRI, r_dest_hi, 32);
           NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi);
+          FreeTemp(r_dest_hi);
         }
       }
     } else {
@@ -370,12 +373,6 @@
       is64bit = true;
       if (X86_FPREG(r_dest)) {
         opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
-        if (X86_SINGLEREG(r_dest)) {
-          DCHECK(X86_FPREG(r_dest_hi));
-          DCHECK_EQ(r_dest, (r_dest_hi - 1));
-          r_dest = S2d(r_dest, r_dest_hi);
-        }
-        r_dest_hi = r_dest + 1;
       } else {
         pair = true;
         opcode = is_array ? kX86Mov32RA  : kX86Mov32RM;
@@ -488,12 +485,6 @@
       is64bit = true;
       if (X86_FPREG(r_src)) {
         opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
-        if (X86_SINGLEREG(r_src)) {
-          DCHECK(X86_FPREG(r_src_hi));
-          DCHECK_EQ(r_src, (r_src_hi - 1));
-          r_src = S2d(r_src, r_src_hi);
-        }
-        r_src_hi = r_src + 1;
       } else {
         pair = true;
         opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
@@ -573,4 +564,17 @@
                               r_src_lo, r_src_hi, kLong, INVALID_SREG);
 }
 
+/*
+ * Copy a long value in Core registers to an XMM register
+ *
+ */
+void X86Mir2Lir::OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg) {
+  NewLIR2(kX86MovdxrRR, fp_reg, low_reg);
+  int tmp_reg = AllocTempDouble();
+  NewLIR2(kX86MovdxrRR, tmp_reg, high_reg);
+  NewLIR2(kX86PsllqRI, tmp_reg, 32);
+  NewLIR2(kX86OrpsRR, fp_reg, tmp_reg);
+  FreeTemp(tmp_reg);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f38a16d..1488f5d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -128,11 +128,11 @@
 #define X86_FP_REG_MASK 0xF
 
 // RegisterLocation templates return values (rAX, rAX/rDX or XMM0).
-//                               location,     wide, defined, const, fp, core, ref, high_word, home, low_reg, high_reg,     s_reg_low
-#define X86_LOC_C_RETURN             {kLocPhysReg, 0,    0,       0,     0,  0,    0,   0,        1,    rAX,    INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_WIDE        {kLocPhysReg, 1,    0,       0,     0,  0,    0,   0,        1,    rAX,    rDX,         INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_FLOAT       {kLocPhysReg, 0,    0,       0,     1,  0,    0,   0,        1,    fr0,    INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_DOUBLE      {kLocPhysReg, 1,    0,       0,     1,  0,    0,   0,        1,    fr0,    fr1,         INVALID_SREG, INVALID_SREG}
+//                               location,     wide, defined, const, fp, core, ref, high_word, home, vec_len, low_reg, high_reg,     s_reg_low
+#define X86_LOC_C_RETURN             {kLocPhysReg, 0,    0,       0,     0,  0,    0,   0,        1,    kVectorNotUsed, rAX,    INVALID_REG, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_WIDE        {kLocPhysReg, 1,    0,       0,     0,  0,    0,   0,        1,    kVectorNotUsed, rAX,    rDX,         INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_FLOAT       {kLocPhysReg, 0,    0,       0,     1,  0,    0,   0,        1,    kVectorLength4, fr0,    INVALID_REG, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_DOUBLE      {kLocPhysReg, 1,    0,       0,     1,  0,    0,   0,        1,    kVectorLength8, fr0,    fr0,         INVALID_SREG, INVALID_SREG}
 
 enum X86ResourceEncodingPos {
   kX86GPReg0   = 0,
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index bef966c..f211e3f 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -405,7 +405,7 @@
 }
 
 static const RegLocation fresh_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0,
-                                     INVALID_REG, INVALID_REG, INVALID_SREG,
+                                     kVectorNotUsed, INVALID_REG, INVALID_REG, INVALID_SREG,
                                      INVALID_SREG};
 
 void MIRGraph::InitRegLocations() {