[optimizing] Use more X86_64 addressing modes

Allow constant and memory addresses to more X86_64 instructions.

Add memory formats to X86_64 instructions to match.

Fix a bug in cmpq(CpuRegister, const Address&).

Allow mov <addr>,immediate (instruction 0xC7) to be a valid faulting
instruction.

Change-Id: I5b8a409444426633920cd08e09f687a7afc88a39
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d8d2ae3..b404f8d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1023,14 +1023,14 @@
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -1052,24 +1052,46 @@
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(value));
-        if (value == 0) {
-          __ testq(left_reg, left_reg);
+        if (IsInt<32>(value)) {
+          if (value == 0) {
+            __ testq(left_reg, left_reg);
+          } else {
+            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+          }
         } else {
-          __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+          // Value won't fit in an int.
+          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
         }
+      } else if (right.IsDoubleStackSlot()) {
+        __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
       }
       break;
     }
     case Primitive::kPrimFloat: {
-      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
+      if (right.IsConstant()) {
+        float value = right.GetConstant()->AsFloatConstant()->GetValue();
+        __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
+      } else if (right.IsStackSlot()) {
+        __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
+      }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
       break;
     }
     case Primitive::kPrimDouble: {
-      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
+      if (right.IsConstant()) {
+        double value = right.GetConstant()->AsDoubleConstant()->GetValue();
+        __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
+      } else if (right.IsDoubleStackSlot()) {
+        __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
+      }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
       break;
     }
@@ -1178,8 +1200,7 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      locations->SetInAt(0,
-          Location::FpuRegisterLocation(XMM0));
+      locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
       break;
 
     default:
@@ -1419,7 +1440,6 @@
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
       locations->SetOut(Location::SameAsFirstInput());
-      locations->AddTemp(Location::RequiresRegister());
       locations->AddTemp(Location::RequiresFpuRegister());
       break;
 
@@ -1447,26 +1467,22 @@
 
     case Primitive::kPrimFloat: {
       DCHECK(in.Equals(out));
-      CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
-      XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
       // Implement float negation with an exclusive or with value
       // 0x80000000 (mask for bit 31, representing the sign of a
       // single-precision floating-point number).
-      __ movq(constant, Immediate(INT64_C(0x80000000)));
-      __ movd(mask, constant);
+      __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
       break;
     }
 
     case Primitive::kPrimDouble: {
       DCHECK(in.Equals(out));
-      CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
-      XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
       // Implement double negation with an exclusive or with value
       // 0x8000000000000000 (mask for bit 63, representing the sign of
       // a double-precision floating-point number).
-      __ movq(constant, Immediate(INT64_C(0x8000000000000000)));
-      __ movd(mask, constant);
+      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
       break;
     }
@@ -1613,19 +1629,19 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-float' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-float' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.
-          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
@@ -1644,19 +1660,19 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-double' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-double' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimFloat:
           // Processing a Dex `float-to-double' instruction.
-          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
@@ -1910,17 +1926,56 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-float' instruction.
-          __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          if (in.IsRegister()) {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          } else if (in.IsConstant()) {
+            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorps(dest, dest);
+            } else {
+              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+            }
+          } else {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
+          }
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-float' instruction.
-          __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          if (in.IsRegister()) {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          } else if (in.IsConstant()) {
+            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorps(dest, dest);
+            } else {
+              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+            }
+          } else {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
+          }
           break;
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.
-          __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          if (in.IsFpuRegister()) {
+            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          } else if (in.IsConstant()) {
+            double v = in.GetConstant()->AsDoubleConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (bit_cast<int64_t, double>(v) == 0) {
+              __ xorps(dest, dest);
+            } else {
+              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+            }
+          } else {
+            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()));
+          }
           break;
 
         default:
@@ -1938,17 +1993,56 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-double' instruction.
-          __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          if (in.IsRegister()) {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          } else if (in.IsConstant()) {
+            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorpd(dest, dest);
+            } else {
+              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+            }
+          } else {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
+          }
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-double' instruction.
-          __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          if (in.IsRegister()) {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          } else if (in.IsConstant()) {
+            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorpd(dest, dest);
+            } else {
+              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+            }
+          } else {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
+          }
           break;
 
         case Primitive::kPrimFloat:
           // Processing a Dex `float-to-double' instruction.
-          __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          if (in.IsFpuRegister()) {
+            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          } else if (in.IsConstant()) {
+            float v = in.GetConstant()->AsFloatConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (bit_cast<int32_t, float>(v) == 0) {
+              __ xorpd(dest, dest);
+            } else {
+              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+            }
+          } else {
+            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()));
+          }
           break;
 
         default:
@@ -3128,7 +3222,7 @@
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
     locations->SetInAt(1, Location::RequiresFpuRegister());
   } else {
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
@@ -3155,24 +3249,46 @@
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movb(Address(base, offset), Immediate(v));
+      } else {
+        __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimLong: {
-      __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(v));
+        int32_t v_32 = v;
+        __ movq(Address(base, offset), Immediate(v_32));
+      } else {
+        __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
@@ -3291,8 +3407,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
@@ -3431,7 +3546,7 @@
         1, Location::RegisterOrConstant(instruction->InputAt(1)));
     locations->SetInAt(2, Location::RequiresRegister());
     if (value_type == Primitive::kPrimLong) {
-      locations->SetInAt(2, Location::RequiresRegister());
+      locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
     } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
       locations->SetInAt(2, Location::RequiresFpuRegister());
     } else {
@@ -3519,8 +3634,8 @@
             __ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
           } else {
             DCHECK(value.IsConstant()) << value;
-            __ movl(Address(obj, offset),
-                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+            __ movl(Address(obj, offset), Immediate(v));
           }
         } else {
           DCHECK(index.IsRegister()) << index;
@@ -3529,8 +3644,9 @@
                     value.AsRegister<CpuRegister>());
           } else {
             DCHECK(value.IsConstant()) << value;
+            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
             __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+                    Immediate(v));
           }
         }
         codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -3554,12 +3670,25 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        DCHECK(value.IsRegister());
-        __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
+        if (value.IsRegister()) {
+          __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
+        } else {
+          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+          DCHECK(IsInt<32>(v));
+          int32_t v_32 = v;
+          __ movq(Address(obj, offset), Immediate(v_32));
+        }
       } else {
-        DCHECK(value.IsRegister());
-        __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                value.AsRegister<CpuRegister>());
+        if (value.IsRegister()) {
+          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+                  value.AsRegister<CpuRegister>());
+        } else {
+          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+          DCHECK(IsInt<32>(v));
+          int32_t v_32 = v;
+          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+                  Immediate(v_32));
+        }
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
@@ -4145,13 +4274,7 @@
   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
          || instruction->GetResultType() == Primitive::kPrimLong);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->GetType() == Primitive::kPrimInt) {
-    locations->SetInAt(1, Location::Any());
-  } else {
-    // We can handle 32 bit constants.
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
-  }
+  locations->SetInAt(1, Location::Any());
   locations->SetOut(Location::SameAsFirstInput());
 }
 
@@ -4212,25 +4335,43 @@
     if (second.IsConstant()) {
       second_is_constant = true;
       value = second.GetConstant()->AsLongConstant()->GetValue();
-      DCHECK(IsInt<32>(value));
     }
+    bool is_int32_value = IsInt<32>(value);
 
     if (instruction->IsAnd()) {
       if (second_is_constant) {
-        __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+        if (is_int32_value) {
+          __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+        } else {
+          __ andq(first_reg, codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsDoubleStackSlot()) {
+        __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
       } else {
         __ andq(first_reg, second.AsRegister<CpuRegister>());
       }
     } else if (instruction->IsOr()) {
       if (second_is_constant) {
-        __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+        if (is_int32_value) {
+          __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+        } else {
+          __ orq(first_reg, codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsDoubleStackSlot()) {
+        __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
       } else {
         __ orq(first_reg, second.AsRegister<CpuRegister>());
       }
     } else {
       DCHECK(instruction->IsXor());
       if (second_is_constant) {
-        __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+        if (is_int32_value) {
+          __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+        } else {
+          __ xorq(first_reg, codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsDoubleStackSlot()) {
+        __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
       } else {
         __ xorq(first_reg, second.AsRegister<CpuRegister>());
       }
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index d9a1c31..c369020 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -704,7 +704,6 @@
     locations->SetInAt(0, Location::RequiresFpuRegister());
     locations->SetOut(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
-    locations->AddTemp(Location::RequiresFpuRegister());
     return;
   }
 
@@ -732,14 +731,12 @@
   // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   Label done, nan;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Generate 0.5 into inPlusPointFive.
-  __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
-  __ movd(inPlusPointFive, out, false);
+  // Load 0.5 into inPlusPointFive.
+  __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
 
   // Add in the input.
   __ addss(inPlusPointFive, in);
@@ -747,12 +744,8 @@
   // And truncate to an integer.
   __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
 
-  __ movl(out, Immediate(kPrimIntMax));
-  // maxInt = int-to-float(out)
-  __ cvtsi2ss(maxInt, out);
-
   // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, maxInt);
+  __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
   __ j(kAboveEqual, &done);
 
   // if input == NaN goto nan
@@ -782,14 +775,12 @@
   // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   Label done, nan;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Generate 0.5 into inPlusPointFive.
-  __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
-  __ movd(inPlusPointFive, out, true);
+  // Load 0.5 into inPlusPointFive.
+  __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
 
   // Add in the input.
   __ addsd(inPlusPointFive, in);
@@ -797,12 +788,8 @@
   // And truncate to an integer.
   __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
 
-  __ movq(out, Immediate(kPrimLongMax));
-  // maxLong = long-to-double(out)
-  __ cvtsi2sd(maxLong, out, true);
-
   // if inPlusPointFive >= maxLong goto done
-  __ comisd(inPlusPointFive, maxLong);
+  __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
   __ j(kAboveEqual, &done);
 
   // if input == NaN goto nan
@@ -960,26 +947,48 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
 }
 
 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
+  Location value = locations->InAt(1);
   // x86 allows unaligned access. We do not have to check the input or use specific instructions
   // to avoid a SIGBUS.
   switch (size) {
     case Primitive::kPrimByte:
-      __ movb(Address(address, 0), value);
+      if (value.IsConstant()) {
+        __ movb(Address(address, 0),
+                Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+      } else {
+        __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     case Primitive::kPrimShort:
-      __ movw(Address(address, 0), value);
+      if (value.IsConstant()) {
+        __ movw(Address(address, 0),
+                Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+      } else {
+        __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     case Primitive::kPrimInt:
-      __ movl(Address(address, 0), value);
+      if (value.IsConstant()) {
+        __ movl(Address(address, 0),
+                Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+      } else {
+        __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     case Primitive::kPrimLong:
-      __ movq(Address(address, 0), value);
+      if (value.IsConstant()) {
+        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(v));
+        int32_t v_32 = v;
+        __ movq(Address(address, 0), Immediate(v_32));
+      } else {
+        __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     default:
       LOG(FATAL) << "Type not recognized for poke: " << size;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 32204a9..1ff99df 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -128,6 +128,16 @@
 }
 
 
+void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
+  CHECK(imm.is_int32());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst);
+  EmitUint8(0xC7);
+  EmitOperand(0, dst);
+  EmitImmediate(imm);
+}
+
+
 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
@@ -652,6 +662,21 @@
 }
 
 
+void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
+  EmitUint8(0x0F);
+  EmitUint8(0x2A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
   cvtsi2sd(dst, src, false);
 }
@@ -672,6 +697,21 @@
 }
 
 
+void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
+  EmitUint8(0x0F);
+  EmitUint8(0x2A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -692,6 +732,16 @@
 }
 
 
+void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
@@ -752,6 +802,16 @@
 }
 
 
+void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -771,6 +831,15 @@
 }
 
 
+void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -780,6 +849,17 @@
   EmitXmmRegisterOperand(a.LowBits(), b);
 }
 
+
+void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(a, b);
@@ -789,6 +869,15 @@
 }
 
 
+void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -799,6 +888,16 @@
 }
 
 
+void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1161,7 +1260,7 @@
 
 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(reg);
+  EmitRex64(reg, address);
   EmitUint8(0x3B);
   EmitOperand(reg.LowBits(), address);
 }
@@ -1288,6 +1387,14 @@
 }
 
 
+void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x23);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1327,6 +1434,14 @@
 }
 
 
+void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x0B);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1365,6 +1480,14 @@
   EmitComplex(6, Operand(dst), imm);
 }
 
+void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x33);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 #if 0
 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
   // REX.WRXB
@@ -2182,9 +2305,15 @@
   if (dst.NeedsRex()) {
     rex |= 0x44;  // REX.0R00
   }
-  if (rex != 0) {
-    EmitUint8(rex);
+  EmitUint8(rex);
+}
+
+void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
+  uint8_t rex = 0x48 | operand.rex();  // REX.W000
+  if (dst.NeedsRex()) {
+    rex |= 0x44;  // REX.0R00
   }
+  EmitUint8(rex);
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 16ef70b..79ad8f5 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -328,6 +328,7 @@
   void movq(CpuRegister dst, const Address& src);
   void movl(CpuRegister dst, const Address& src);
   void movq(const Address& dst, CpuRegister src);
+  void movq(const Address& dst, const Immediate& src);
   void movl(const Address& dst, CpuRegister src);
   void movl(const Address& dst, const Immediate& imm);
 
@@ -391,14 +392,18 @@
 
   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
   void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
+  void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
   void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
   void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
+  void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
 
   void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvtss2sd(XmmRegister dst, XmmRegister src);
+  void cvtss2sd(XmmRegister dst, const Address& src);
 
   void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvtsd2ss(XmmRegister dst, XmmRegister src);
+  void cvtsd2ss(XmmRegister dst, const Address& src);
 
   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
@@ -408,9 +413,13 @@
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
+  void comiss(XmmRegister a, const Address& b);
   void comisd(XmmRegister a, XmmRegister b);
+  void comisd(XmmRegister a, const Address& b);
   void ucomiss(XmmRegister a, XmmRegister b);
+  void ucomiss(XmmRegister a, const Address& b);
   void ucomisd(XmmRegister a, XmmRegister b);
+  void ucomisd(XmmRegister a, const Address& b);
 
   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
@@ -487,18 +496,21 @@
   void andl(CpuRegister reg, const Address& address);
   void andq(CpuRegister dst, const Immediate& imm);
   void andq(CpuRegister dst, CpuRegister src);
+  void andq(CpuRegister reg, const Address& address);
 
   void orl(CpuRegister dst, const Immediate& imm);
   void orl(CpuRegister dst, CpuRegister src);
   void orl(CpuRegister reg, const Address& address);
   void orq(CpuRegister dst, CpuRegister src);
   void orq(CpuRegister dst, const Immediate& imm);
+  void orq(CpuRegister reg, const Address& address);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorl(CpuRegister dst, const Immediate& imm);
   void xorl(CpuRegister reg, const Address& address);
   void xorq(CpuRegister dst, const Immediate& imm);
   void xorq(CpuRegister dst, CpuRegister src);
+  void xorq(CpuRegister reg, const Address& address);
 
   void addl(CpuRegister dst, CpuRegister src);
   void addl(CpuRegister reg, const Immediate& imm);
@@ -789,6 +801,7 @@
   void EmitRex64(const Operand& operand);
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
+  void EmitRex64(XmmRegister dst, const Operand& operand);
   void EmitRex64(XmmRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, XmmRegister src);
 
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 5ca0373..454cb02 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -667,6 +667,107 @@
   DriverStr(expected, "movw");
 }
 
+TEST_F(AssemblerX86_64Test, MovqAddrImm) {
+  GetAssembler()->movq(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                       x86_64::Immediate(-5));
+  const char* expected = "movq $-5, 0(%RAX)\n";
+  DriverStr(expected, "movq");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsi2ssAddr) {
+  GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           false);
+  GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           true);
+  const char* expected = "cvtsi2ss 0(%RAX), %xmm0\n"
+                         "cvtsi2ssq 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtsi2ss");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsi2sdAddr) {
+  GetAssembler()->cvtsi2sd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           false);
+  GetAssembler()->cvtsi2sd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           true);
+  const char* expected = "cvtsi2sd 0(%RAX), %xmm0\n"
+                         "cvtsi2sdq 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtsi2sd");
+}
+
+TEST_F(AssemblerX86_64Test, CmpqAddr) {
+  GetAssembler()->cmpq(x86_64::CpuRegister(x86_64::R12),
+                       x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "cmpq 0(%R9), %R12\n";
+  DriverStr(expected, "cmpq");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtss2sdAddr) {
+  GetAssembler()->cvtss2sd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "cvtss2sd 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtss2sd");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsd2ssAddr) {
+  GetAssembler()->cvtsd2ss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "cvtsd2ss 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtsd2ss");
+}
+
+TEST_F(AssemblerX86_64Test, ComissAddr) {
+  GetAssembler()->comiss(x86_64::XmmRegister(x86_64::XMM14),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "comiss 0(%RAX), %xmm14\n";
+  DriverStr(expected, "comiss");
+}
+
+TEST_F(AssemblerX86_64Test, ComisdAddr) {
+  GetAssembler()->comisd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "comisd 0(%R9), %xmm0\n";
+  DriverStr(expected, "comisd");
+}
+
+TEST_F(AssemblerX86_64Test, UComissAddr) {
+  GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "ucomiss 0(%RAX), %xmm0\n";
+  DriverStr(expected, "ucomiss");
+}
+
+TEST_F(AssemblerX86_64Test, UComisdAddr) {
+  GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "ucomisd 0(%RAX), %xmm0\n";
+  DriverStr(expected, "ucomisd");
+}
+
+TEST_F(AssemblerX86_64Test, Andq) {
+  GetAssembler()->andq(x86_64::CpuRegister(x86_64::R9),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "andq 0(%RAX), %r9\n";
+  DriverStr(expected, "andq");
+}
+
+TEST_F(AssemblerX86_64Test, Orq) {
+  GetAssembler()->orq(x86_64::CpuRegister(x86_64::R9),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "orq 0(%RAX), %r9\n";
+  DriverStr(expected, "orq");
+}
+
+TEST_F(AssemblerX86_64Test, Xorq) {
+  GetAssembler()->xorq(x86_64::CpuRegister(x86_64::R9),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "xorq 0(%RAX), %r9\n";
+  DriverStr(expected, "xorq");
+}
+
 TEST_F(AssemblerX86_64Test, Movsxd) {
   DriverStr(RepeatRr(&x86_64::X86_64Assembler::movsxd, "movsxd %{reg2}, %{reg1}"), "movsxd");
 }