Merge "Simplify template parameters of Elf classes."
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d8d2ae3..b404f8d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1023,14 +1023,14 @@
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -1052,24 +1052,46 @@
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(value));
-        if (value == 0) {
-          __ testq(left_reg, left_reg);
+        if (IsInt<32>(value)) {
+          if (value == 0) {
+            __ testq(left_reg, left_reg);
+          } else {
+            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+          }
         } else {
-          __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+          // Value won't fit in an int.
+          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
         }
+      } else if (right.IsDoubleStackSlot()) {
+        __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
       }
       break;
     }
     case Primitive::kPrimFloat: {
-      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
+      if (right.IsConstant()) {
+        float value = right.GetConstant()->AsFloatConstant()->GetValue();
+        __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
+      } else if (right.IsStackSlot()) {
+        __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
+      }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
       break;
     }
     case Primitive::kPrimDouble: {
-      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
+      if (right.IsConstant()) {
+        double value = right.GetConstant()->AsDoubleConstant()->GetValue();
+        __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
+      } else if (right.IsDoubleStackSlot()) {
+        __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
+      }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
       break;
     }
@@ -1178,8 +1200,7 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      locations->SetInAt(0,
-          Location::FpuRegisterLocation(XMM0));
+      locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
       break;
 
     default:
@@ -1419,7 +1440,6 @@
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
       locations->SetOut(Location::SameAsFirstInput());
-      locations->AddTemp(Location::RequiresRegister());
       locations->AddTemp(Location::RequiresFpuRegister());
       break;
 
@@ -1447,26 +1467,22 @@
 
     case Primitive::kPrimFloat: {
       DCHECK(in.Equals(out));
-      CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
-      XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
       // Implement float negation with an exclusive or with value
       // 0x80000000 (mask for bit 31, representing the sign of a
       // single-precision floating-point number).
-      __ movq(constant, Immediate(INT64_C(0x80000000)));
-      __ movd(mask, constant);
+      __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
       break;
     }
 
     case Primitive::kPrimDouble: {
       DCHECK(in.Equals(out));
-      CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
-      XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
       // Implement double negation with an exclusive or with value
       // 0x8000000000000000 (mask for bit 63, representing the sign of
       // a double-precision floating-point number).
-      __ movq(constant, Immediate(INT64_C(0x8000000000000000)));
-      __ movd(mask, constant);
+      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
       break;
     }
@@ -1613,19 +1629,19 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-float' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-float' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.
-          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
@@ -1644,19 +1660,19 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-double' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-double' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimFloat:
           // Processing a Dex `float-to-double' instruction.
-          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
@@ -1910,17 +1926,56 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-float' instruction.
-          __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          if (in.IsRegister()) {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          } else if (in.IsConstant()) {
+            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorps(dest, dest);
+            } else {
+              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+            }
+          } else {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
+          }
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-float' instruction.
-          __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          if (in.IsRegister()) {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          } else if (in.IsConstant()) {
+            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorps(dest, dest);
+            } else {
+              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+            }
+          } else {
+            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
+          }
           break;
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.
-          __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          if (in.IsFpuRegister()) {
+            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          } else if (in.IsConstant()) {
+            double v = in.GetConstant()->AsDoubleConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (bit_cast<int64_t, double>(v) == 0) {
+              __ xorps(dest, dest);
+            } else {
+              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+            }
+          } else {
+            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()));
+          }
           break;
 
         default:
@@ -1938,17 +1993,56 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-double' instruction.
-          __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          if (in.IsRegister()) {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+          } else if (in.IsConstant()) {
+            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorpd(dest, dest);
+            } else {
+              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+            }
+          } else {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
+          }
           break;
 
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-double' instruction.
-          __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          if (in.IsRegister()) {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          } else if (in.IsConstant()) {
+            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (v == 0) {
+              __ xorpd(dest, dest);
+            } else {
+              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+            }
+          } else {
+            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
+          }
           break;
 
         case Primitive::kPrimFloat:
           // Processing a Dex `float-to-double' instruction.
-          __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          if (in.IsFpuRegister()) {
+            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+          } else if (in.IsConstant()) {
+            float v = in.GetConstant()->AsFloatConstant()->GetValue();
+            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+            if (bit_cast<int32_t, float>(v) == 0) {
+              __ xorpd(dest, dest);
+            } else {
+              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+            }
+          } else {
+            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
+                        Address(CpuRegister(RSP), in.GetStackIndex()));
+          }
           break;
 
         default:
@@ -3128,7 +3222,7 @@
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
     locations->SetInAt(1, Location::RequiresFpuRegister());
   } else {
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
@@ -3155,24 +3249,46 @@
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movb(Address(base, offset), Immediate(v));
+      } else {
+        __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimLong: {
-      __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+      if (value.IsConstant()) {
+        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(v));
+        int32_t v_32 = v;
+        __ movq(Address(base, offset), Immediate(v_32));
+      } else {
+        __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+      }
       break;
     }
 
@@ -3291,8 +3407,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
@@ -3431,7 +3546,7 @@
         1, Location::RegisterOrConstant(instruction->InputAt(1)));
     locations->SetInAt(2, Location::RequiresRegister());
     if (value_type == Primitive::kPrimLong) {
-      locations->SetInAt(2, Location::RequiresRegister());
+      locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
     } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
       locations->SetInAt(2, Location::RequiresFpuRegister());
     } else {
@@ -3519,8 +3634,8 @@
             __ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
           } else {
             DCHECK(value.IsConstant()) << value;
-            __ movl(Address(obj, offset),
-                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+            __ movl(Address(obj, offset), Immediate(v));
           }
         } else {
           DCHECK(index.IsRegister()) << index;
@@ -3529,8 +3644,9 @@
                     value.AsRegister<CpuRegister>());
           } else {
             DCHECK(value.IsConstant()) << value;
+            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
             __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+                    Immediate(v));
           }
         }
         codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -3554,12 +3670,25 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        DCHECK(value.IsRegister());
-        __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
+        if (value.IsRegister()) {
+          __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
+        } else {
+          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+          DCHECK(IsInt<32>(v));
+          int32_t v_32 = v;
+          __ movq(Address(obj, offset), Immediate(v_32));
+        }
       } else {
-        DCHECK(value.IsRegister());
-        __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                value.AsRegister<CpuRegister>());
+        if (value.IsRegister()) {
+          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+                  value.AsRegister<CpuRegister>());
+        } else {
+          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+          DCHECK(IsInt<32>(v));
+          int32_t v_32 = v;
+          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+                  Immediate(v_32));
+        }
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
@@ -4145,13 +4274,7 @@
   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
          || instruction->GetResultType() == Primitive::kPrimLong);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->GetType() == Primitive::kPrimInt) {
-    locations->SetInAt(1, Location::Any());
-  } else {
-    // We can handle 32 bit constants.
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
-  }
+  locations->SetInAt(1, Location::Any());
   locations->SetOut(Location::SameAsFirstInput());
 }
 
@@ -4212,25 +4335,43 @@
     if (second.IsConstant()) {
       second_is_constant = true;
       value = second.GetConstant()->AsLongConstant()->GetValue();
-      DCHECK(IsInt<32>(value));
     }
+    bool is_int32_value = IsInt<32>(value);
 
     if (instruction->IsAnd()) {
       if (second_is_constant) {
-        __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+        if (is_int32_value) {
+          __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+        } else {
+          __ andq(first_reg, codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsDoubleStackSlot()) {
+        __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
       } else {
         __ andq(first_reg, second.AsRegister<CpuRegister>());
       }
     } else if (instruction->IsOr()) {
       if (second_is_constant) {
-        __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+        if (is_int32_value) {
+          __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+        } else {
+          __ orq(first_reg, codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsDoubleStackSlot()) {
+        __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
       } else {
         __ orq(first_reg, second.AsRegister<CpuRegister>());
       }
     } else {
       DCHECK(instruction->IsXor());
       if (second_is_constant) {
-        __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+        if (is_int32_value) {
+          __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+        } else {
+          __ xorq(first_reg, codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsDoubleStackSlot()) {
+        __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
       } else {
         __ xorq(first_reg, second.AsRegister<CpuRegister>());
       }
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index e420a62..02ad675 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -62,7 +62,7 @@
 
   check_after_cf(graph);
 
-  HDeadCodeElimination(graph, nullptr).Run();
+  HDeadCodeElimination(graph).Run();
   SSAChecker ssa_checker_dce(&allocator, graph);
   ssa_checker_dce.Run();
   ASSERT_TRUE(ssa_checker_dce.IsValid());
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 3f309c5..cee9364 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -29,8 +29,10 @@
  */
 class HDeadCodeElimination : public HOptimization {
  public:
-  HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, true, kDeadCodeEliminationPassName, stats) {}
+  HDeadCodeElimination(HGraph* graph,
+                       OptimizingCompilerStats* stats = nullptr,
+                       const char* name = kDeadCodeEliminationPassName)
+      : HOptimization(graph, true, name, stats) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 6350019..98ae1ec 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -44,7 +44,7 @@
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
-  HDeadCodeElimination(graph, nullptr).Run();
+  HDeadCodeElimination(graph).Run();
   SSAChecker ssa_checker(&allocator, graph);
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index e743d8e..8950635 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -88,23 +88,36 @@
 
   // Visit this block's list of phis.
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    HInstruction* current = it.Current();
     // Ensure this block's list of phis contains only phis.
-    if (!it.Current()->IsPhi()) {
+    if (!current->IsPhi()) {
       AddError(StringPrintf("Block %d has a non-phi in its phi list.",
                             current_block_->GetBlockId()));
     }
-    it.Current()->Accept(this);
+    if (current->GetNext() == nullptr && current != block->GetLastPhi()) {
+      AddError(StringPrintf("The recorded last phi of block %d does not match "
+                            "the actual last phi %d.",
+                            current_block_->GetBlockId(),
+                            current->GetId()));
+    }
+    current->Accept(this);
   }
 
   // Visit this block's list of instructions.
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done();
-       it.Advance()) {
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* current = it.Current();
     // Ensure this block's list of instructions does not contains phis.
-    if (it.Current()->IsPhi()) {
+    if (current->IsPhi()) {
       AddError(StringPrintf("Block %d has a phi in its non-phi list.",
                             current_block_->GetBlockId()));
     }
-    it.Current()->Accept(this);
+    if (current->GetNext() == nullptr && current != block->GetLastInstruction()) {
+      AddError(StringPrintf("The recorded last instruction of block %d does not match "
+                            "the actual last instruction %d.",
+                            current_block_->GetBlockId(),
+                            current->GetId()));
+    }
+    current->Accept(this);
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index f30c9a6..98c0eed 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -89,10 +89,6 @@
       // current index, so don't advance the iterator.
       continue;
     }
-    if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) {
-      LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_
-          << ") occurred at the current position.";
-    }
     simplifications_at_current_position_ = 0;
     it.Advance();
   }
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index c718ece..5d24d1f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -704,7 +704,6 @@
     locations->SetInAt(0, Location::RequiresFpuRegister());
     locations->SetOut(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
-    locations->AddTemp(Location::RequiresFpuRegister());
     return;
   }
 
@@ -732,14 +731,12 @@
   // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   Label done, nan;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Generate 0.5 into inPlusPointFive.
-  __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
-  __ movd(inPlusPointFive, out, false);
+  // Load 0.5 into inPlusPointFive.
+  __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
 
   // Add in the input.
   __ addss(inPlusPointFive, in);
@@ -747,12 +744,8 @@
   // And truncate to an integer.
   __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
 
-  __ movl(out, Immediate(kPrimIntMax));
-  // maxInt = int-to-float(out)
-  __ cvtsi2ss(maxInt, out);
-
   // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, maxInt);
+  __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
   __ j(kAboveEqual, &done);
 
   // if input == NaN goto nan
@@ -782,14 +775,12 @@
   // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   Label done, nan;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Generate 0.5 into inPlusPointFive.
-  __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
-  __ movd(inPlusPointFive, out, true);
+  // Load 0.5 into inPlusPointFive.
+  __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
 
   // Add in the input.
   __ addsd(inPlusPointFive, in);
@@ -797,12 +788,8 @@
   // And truncate to an integer.
   __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
 
-  __ movq(out, Immediate(kPrimLongMax));
-  // maxLong = long-to-double(out)
-  __ cvtsi2sd(maxLong, out, true);
-
   // if inPlusPointFive >= maxLong goto done
-  __ comisd(inPlusPointFive, maxLong);
+  __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
   __ j(kAboveEqual, &done);
 
   // if input == NaN goto nan
@@ -960,26 +947,48 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
 }
 
 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
+  Location value = locations->InAt(1);
   // x86 allows unaligned access. We do not have to check the input or use specific instructions
   // to avoid a SIGBUS.
   switch (size) {
     case Primitive::kPrimByte:
-      __ movb(Address(address, 0), value);
+      if (value.IsConstant()) {
+        __ movb(Address(address, 0),
+                Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+      } else {
+        __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     case Primitive::kPrimShort:
-      __ movw(Address(address, 0), value);
+      if (value.IsConstant()) {
+        __ movw(Address(address, 0),
+                Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+      } else {
+        __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     case Primitive::kPrimInt:
-      __ movl(Address(address, 0), value);
+      if (value.IsConstant()) {
+        __ movl(Address(address, 0),
+                Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+      } else {
+        __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     case Primitive::kPrimLong:
-      __ movq(Address(address, 0), value);
+      if (value.IsConstant()) {
+        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(v));
+        int32_t v_32 = v;
+        __ movq(Address(address, 0), Immediate(v_32));
+      } else {
+        __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
+      }
       break;
     default:
       LOG(FATAL) << "Type not recognized for poke: " << size;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 4b9d4fc..bef5896 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -416,26 +416,6 @@
   DCHECK(!instruction->HasEnvironment());
 }
 
-void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
-  DCHECK(!cursor->IsPhi());
-  DCHECK(!instruction->IsPhi());
-  DCHECK_EQ(instruction->GetId(), -1);
-  DCHECK_NE(cursor->GetId(), -1);
-  DCHECK_EQ(cursor->GetBlock(), this);
-  DCHECK(!instruction->IsControlFlow());
-  instruction->next_ = cursor;
-  instruction->previous_ = cursor->previous_;
-  cursor->previous_ = instruction;
-  if (GetFirstInstruction() == cursor) {
-    instructions_.first_instruction_ = instruction;
-  } else {
-    instruction->previous_->next_ = instruction;
-  }
-  instruction->SetBlock(this);
-  instruction->SetId(GetGraph()->GetNextInstructionId());
-  UpdateInputsUsers(instruction);
-}
-
 void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                                   HInstruction* replacement) {
   DCHECK(initial->GetBlock() == this);
@@ -463,23 +443,27 @@
   Add(&phis_, this, phi);
 }
 
+void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK(!cursor->IsPhi());
+  DCHECK(!instruction->IsPhi());
+  DCHECK_EQ(instruction->GetId(), -1);
+  DCHECK_NE(cursor->GetId(), -1);
+  DCHECK_EQ(cursor->GetBlock(), this);
+  DCHECK(!instruction->IsControlFlow());
+  instruction->SetBlock(this);
+  instruction->SetId(GetGraph()->GetNextInstructionId());
+  UpdateInputsUsers(instruction);
+  instructions_.InsertInstructionBefore(instruction, cursor);
+}
+
 void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
   DCHECK_EQ(phi->GetId(), -1);
   DCHECK_NE(cursor->GetId(), -1);
   DCHECK_EQ(cursor->GetBlock(), this);
-  if (cursor->next_ == nullptr) {
-    cursor->next_ = phi;
-    phi->previous_ = cursor;
-    DCHECK(phi->next_ == nullptr);
-  } else {
-    phi->next_ = cursor->next_;
-    phi->previous_ = cursor;
-    cursor->next_ = phi;
-    phi->next_->previous_ = phi;
-  }
   phi->SetBlock(this);
   phi->SetId(GetGraph()->GetNextInstructionId());
   UpdateInputsUsers(phi);
+  phis_.InsertInstructionAfter(phi, cursor);
 }
 
 static void Remove(HInstructionList* instruction_list,
@@ -546,6 +530,34 @@
   }
 }
 
+void HInstructionList::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK(Contains(cursor));
+  if (cursor == first_instruction_) {
+    cursor->previous_ = instruction;
+    instruction->next_ = cursor;
+    first_instruction_ = instruction;
+  } else {
+    instruction->previous_ = cursor->previous_;
+    instruction->next_ = cursor;
+    cursor->previous_ = instruction;
+    instruction->previous_->next_ = instruction;
+  }
+}
+
+void HInstructionList::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK(Contains(cursor));
+  if (cursor == last_instruction_) {
+    cursor->next_ = instruction;
+    instruction->previous_ = cursor;
+    last_instruction_ = instruction;
+  } else {
+    instruction->next_ = cursor->next_;
+    instruction->previous_ = cursor;
+    cursor->next_ = instruction;
+    instruction->next_->previous_ = instruction;
+  }
+}
+
 void HInstructionList::RemoveInstruction(HInstruction* instruction) {
   if (instruction->previous_ != nullptr) {
     instruction->previous_->next_ = instruction->next_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 08fcdbb..1a24cb5 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -75,6 +75,10 @@
   void AddInstruction(HInstruction* instruction);
   void RemoveInstruction(HInstruction* instruction);
 
+  // Insert `instruction` before/after an existing instruction `cursor`.
+  void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
+  void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor);
+
   // Return true if this list contains `instruction`.
   bool Contains(HInstruction* instruction) const;
 
@@ -467,8 +471,9 @@
   HInstruction* GetFirstInstruction() const { return instructions_.first_instruction_; }
   HInstruction* GetLastInstruction() const { return instructions_.last_instruction_; }
   const HInstructionList& GetInstructions() const { return instructions_; }
-  const HInstructionList& GetPhis() const { return phis_; }
   HInstruction* GetFirstPhi() const { return phis_.first_instruction_; }
+  HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
+  const HInstructionList& GetPhis() const { return phis_; }
 
   void AddSuccessor(HBasicBlock* block) {
     successors_.Add(block);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index ab752c3..218894f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -321,7 +321,7 @@
                              PassInfoPrinter* pass_info_printer,
                              StackHandleScopeCollection* handles) {
   HDeadCodeElimination dce1(graph, stats);
-  HDeadCodeElimination dce2(graph, stats);
+  HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final");
   HConstantFolding fold1(graph);
   InstructionSimplifier simplify1(graph, stats);
   HBooleanSimplifier boolean_not(graph);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 32204a9..0344f52 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -128,6 +128,16 @@
 }
 
 
+void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
+  CHECK(imm.is_int32());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst);
+  EmitUint8(0xC7);
+  EmitOperand(0, dst);
+  EmitImmediate(imm);
+}
+
+
 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
@@ -388,7 +398,7 @@
 
 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(dst);
+  EmitRex64(dst, src);
   EmitUint8(0x63);
   EmitOperand(dst.LowBits(), src);
 }
@@ -652,6 +662,21 @@
 }
 
 
+void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
+  EmitUint8(0x0F);
+  EmitUint8(0x2A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
   cvtsi2sd(dst, src, false);
 }
@@ -672,6 +697,21 @@
 }
 
 
+void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
+  EmitUint8(0x0F);
+  EmitUint8(0x2A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -692,6 +732,16 @@
 }
 
 
+void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
@@ -752,6 +802,16 @@
 }
 
 
+void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5A);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -771,6 +831,15 @@
 }
 
 
+void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -780,6 +849,17 @@
   EmitXmmRegisterOperand(a.LowBits(), b);
 }
 
+
+void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(a, b);
@@ -789,6 +869,15 @@
 }
 
 
+void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -799,6 +888,16 @@
 }
 
 
+void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a.LowBits(), b);
+}
+
+
 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1161,7 +1260,7 @@
 
 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(reg);
+  EmitRex64(reg, address);
   EmitUint8(0x3B);
   EmitOperand(reg.LowBits(), address);
 }
@@ -1243,7 +1342,7 @@
 
 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(reg);
+  EmitRex64(reg, address);
   EmitUint8(0x85);
   EmitOperand(reg.LowBits(), address);
 }
@@ -1288,6 +1387,14 @@
 }
 
 
+void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x23);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1327,6 +1434,14 @@
 }
 
 
+void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x0B);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1365,6 +1480,14 @@
   EmitComplex(6, Operand(dst), imm);
 }
 
+void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x33);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 #if 0
 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
   // REX.WRXB
@@ -1435,7 +1558,7 @@
 
 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(dst);
+  EmitRex64(dst, address);
   EmitUint8(0x03);
   EmitOperand(dst.LowBits(), address);
 }
@@ -1498,7 +1621,7 @@
 
 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(reg);
+  EmitRex64(reg, address);
   EmitUint8(0x2B);
   EmitOperand(reg.LowBits() & 7, address);
 }
@@ -2182,9 +2305,15 @@
   if (dst.NeedsRex()) {
     rex |= 0x44;  // REX.0R00
   }
-  if (rex != 0) {
-    EmitUint8(rex);
+  EmitUint8(rex);
+}
+
+void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
+  uint8_t rex = 0x48 | operand.rex();  // REX.W000
+  if (dst.NeedsRex()) {
+    rex |= 0x44;  // REX.0R00
   }
+  EmitUint8(rex);
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 16ef70b..79ad8f5 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -328,6 +328,7 @@
   void movq(CpuRegister dst, const Address& src);
   void movl(CpuRegister dst, const Address& src);
   void movq(const Address& dst, CpuRegister src);
+  void movq(const Address& dst, const Immediate& src);
   void movl(const Address& dst, CpuRegister src);
   void movl(const Address& dst, const Immediate& imm);
 
@@ -391,14 +392,18 @@
 
   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
   void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
+  void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
   void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
   void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
+  void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
 
   void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvtss2sd(XmmRegister dst, XmmRegister src);
+  void cvtss2sd(XmmRegister dst, const Address& src);
 
   void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvtsd2ss(XmmRegister dst, XmmRegister src);
+  void cvtsd2ss(XmmRegister dst, const Address& src);
 
   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
@@ -408,9 +413,13 @@
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
+  void comiss(XmmRegister a, const Address& b);
   void comisd(XmmRegister a, XmmRegister b);
+  void comisd(XmmRegister a, const Address& b);
   void ucomiss(XmmRegister a, XmmRegister b);
+  void ucomiss(XmmRegister a, const Address& b);
   void ucomisd(XmmRegister a, XmmRegister b);
+  void ucomisd(XmmRegister a, const Address& b);
 
   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
@@ -487,18 +496,21 @@
   void andl(CpuRegister reg, const Address& address);
   void andq(CpuRegister dst, const Immediate& imm);
   void andq(CpuRegister dst, CpuRegister src);
+  void andq(CpuRegister reg, const Address& address);
 
   void orl(CpuRegister dst, const Immediate& imm);
   void orl(CpuRegister dst, CpuRegister src);
   void orl(CpuRegister reg, const Address& address);
   void orq(CpuRegister dst, CpuRegister src);
   void orq(CpuRegister dst, const Immediate& imm);
+  void orq(CpuRegister reg, const Address& address);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorl(CpuRegister dst, const Immediate& imm);
   void xorl(CpuRegister reg, const Address& address);
   void xorq(CpuRegister dst, const Immediate& imm);
   void xorq(CpuRegister dst, CpuRegister src);
+  void xorq(CpuRegister reg, const Address& address);
 
   void addl(CpuRegister dst, CpuRegister src);
   void addl(CpuRegister reg, const Immediate& imm);
@@ -789,6 +801,7 @@
   void EmitRex64(const Operand& operand);
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
+  void EmitRex64(XmmRegister dst, const Operand& operand);
   void EmitRex64(XmmRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, XmmRegister src);
 
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 5ca0373..9e4144a 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -667,6 +667,135 @@
   DriverStr(expected, "movw");
 }
 
+TEST_F(AssemblerX86_64Test, MovqAddrImm) {
+  GetAssembler()->movq(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                       x86_64::Immediate(-5));
+  const char* expected = "movq $-5, 0(%RAX)\n";
+  DriverStr(expected, "movq");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsi2ssAddr) {
+  GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           false);
+  GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           true);
+  const char* expected = "cvtsi2ss 0(%RAX), %xmm0\n"
+                         "cvtsi2ssq 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtsi2ss");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsi2sdAddr) {
+  GetAssembler()->cvtsi2sd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           false);
+  GetAssembler()->cvtsi2sd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
+                           true);
+  const char* expected = "cvtsi2sd 0(%RAX), %xmm0\n"
+                         "cvtsi2sdq 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtsi2sd");
+}
+
+TEST_F(AssemblerX86_64Test, CmpqAddr) {
+  GetAssembler()->cmpq(x86_64::CpuRegister(x86_64::R12),
+                       x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "cmpq 0(%R9), %R12\n";
+  DriverStr(expected, "cmpq");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxdAddr) {
+  GetAssembler()->movsxd(x86_64::CpuRegister(x86_64::R12),
+                       x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "movslq 0(%R9), %R12\n";
+  DriverStr(expected, "movsxd");
+}
+
+TEST_F(AssemblerX86_64Test, TestqAddr) {
+  GetAssembler()->testq(x86_64::CpuRegister(x86_64::R12),
+                        x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "testq 0(%R9), %R12\n";
+  DriverStr(expected, "testq");
+}
+
+TEST_F(AssemblerX86_64Test, AddqAddr) {
+  GetAssembler()->addq(x86_64::CpuRegister(x86_64::R12),
+                        x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "addq 0(%R9), %R12\n";
+  DriverStr(expected, "addq");
+}
+
+TEST_F(AssemblerX86_64Test, SubqAddr) {
+  GetAssembler()->subq(x86_64::CpuRegister(x86_64::R12),
+                        x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "subq 0(%R9), %R12\n";
+  DriverStr(expected, "subq");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtss2sdAddr) {
+  GetAssembler()->cvtss2sd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "cvtss2sd 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtss2sd");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsd2ssAddr) {
+  GetAssembler()->cvtsd2ss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "cvtsd2ss 0(%RAX), %xmm0\n";
+  DriverStr(expected, "cvtsd2ss");
+}
+
+TEST_F(AssemblerX86_64Test, ComissAddr) {
+  GetAssembler()->comiss(x86_64::XmmRegister(x86_64::XMM14),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "comiss 0(%RAX), %xmm14\n";
+  DriverStr(expected, "comiss");
+}
+
+TEST_F(AssemblerX86_64Test, ComisdAddr) {
+  GetAssembler()->comisd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
+  const char* expected = "comisd 0(%R9), %xmm0\n";
+  DriverStr(expected, "comisd");
+}
+
+TEST_F(AssemblerX86_64Test, UComissAddr) {
+  GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "ucomiss 0(%RAX), %xmm0\n";
+  DriverStr(expected, "ucomiss");
+}
+
+TEST_F(AssemblerX86_64Test, UComisdAddr) {
+  GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM0),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "ucomisd 0(%RAX), %xmm0\n";
+  DriverStr(expected, "ucomisd");
+}
+
+TEST_F(AssemblerX86_64Test, Andq) {
+  GetAssembler()->andq(x86_64::CpuRegister(x86_64::R9),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "andq 0(%RAX), %r9\n";
+  DriverStr(expected, "andq");
+}
+
+TEST_F(AssemblerX86_64Test, Orq) {
+  GetAssembler()->orq(x86_64::CpuRegister(x86_64::R9),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "orq 0(%RAX), %r9\n";
+  DriverStr(expected, "orq");
+}
+
+TEST_F(AssemblerX86_64Test, Xorq) {
+  GetAssembler()->xorq(x86_64::CpuRegister(x86_64::R9),
+                           x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
+  const char* expected = "xorq 0(%RAX), %r9\n";
+  DriverStr(expected, "xorq");
+}
+
 TEST_F(AssemblerX86_64Test, Movsxd) {
   DriverStr(RepeatRr(&x86_64::X86_64Assembler::movsxd, "movsxd %{reg2}, %{reg1}"), "movsxd");
 }
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 27a4adf..8712506 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -191,6 +191,7 @@
         break;
 
       case 0x81:        // group 1, word immediate.
+      case 0xc7:        // mov
         modrm = *pc++;
         has_modrm = true;
         immediate_size = operand_size_prefix ? 2 : 4;