48 files changed, 3718 insertions, 114 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index db338f0538..f2f4550f05 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -98,6 +98,8 @@ LIBART_COMPILER_SRC_FILES := \
 	optimizing/gvn.cc \
 	optimizing/inliner.cc \
 	optimizing/instruction_simplifier.cc \
+	optimizing/intrinsics.cc \
+	optimizing/intrinsics_x86_64.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
 	optimizing/optimization.cc \
@@ -122,6 +124,8 @@ LIBART_COMPILER_SRC_FILES := \
 	utils/dwarf_cfi.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
+	utils/mips64/assembler_mips64.cc \
+	utils/mips64/managed_register_mips64.cc \
 	utils/x86/assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
 	utils/x86_64/assembler_x86_64.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 7df71f5b8a..96f8e0c355 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -164,7 +164,7 @@ void CommonCompilerTest::SetUp() {
                                               compiler_kind, instruction_set,
                                               instruction_set_features_.get(),
                                               true, new std::set<std::string>, nullptr,
-                                              2, true, true, timer_.get(), -1, ""));
+                                              2, true, true, "", timer_.get(), -1, ""));
   }
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 234e8b96f6..22be28c4d9 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -63,6 +63,7 @@ size_t CompiledCode::CodeDelta(InstructionSet instruction_set) {
     case kArm:
     case kArm64:
     case kMips:
+    case kMips64:
     case kX86:
     case kX86_64:
       return 0;
@@ -82,6 +83,7 @@ const void* CompiledCode::CodePointer(const void* code_pointer,
     case kArm:
     case kArm64:
     case kMips:
+    case kMips64:
     case kX86:
     case kX86_64:
       return code_pointer;
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 30398527cd..84c0d93008 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -293,9 +293,9 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods
     { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, { d } } }
 
     INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
-    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, kIntrinsicFlagToFloatingPoint),
     INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
-    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, kIntrinsicFlagToFloatingPoint),
 
     INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32),
     INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64),
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9985d66469..56bed39bf1 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -339,7 +339,8 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options,
                                const InstructionSetFeatures* instruction_set_features,
                                bool image, std::set<std::string>* image_classes,
                                std::set<std::string>* compiled_classes, size_t thread_count,
-                               bool dump_stats, bool dump_passes, CumulativeLogger* timer,
+                               bool dump_stats, bool dump_passes,
+                               const std::string& dump_cfg_file_name, CumulativeLogger* timer,
                                int swap_fd, const std::string& profile_file)
     : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
       swap_space_allocator_(new SwapAllocator<void>(swap_space_.get())),
@@ -361,9 +362,10 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options,
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
+      dump_cfg_file_name_(dump_cfg_file_name),
       timings_logger_(timer),
       compiler_context_(nullptr),
-      support_boot_image_fixup_(instruction_set != kMips),
+      support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
       dedupe_code_("dedupe code", *swap_space_allocator_),
       dedupe_src_mapping_table_("dedupe source mapping table", *swap_space_allocator_),
       dedupe_mapping_table_("dedupe mapping table", *swap_space_allocator_),
@@ -2094,6 +2096,7 @@ static bool InstructionSetHasGenericJniStub(InstructionSet isa) {
     case kArm64:
     case kThumb2:
     case kMips:
+    case kMips64:
     case kX86:
     case kX86_64: return true;
     default: return false;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 7ddc32cdd8..11b4329e32 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -97,6 +97,7 @@ class CompilerDriver {
                           bool image, std::set<std::string>* image_classes,
                           std::set<std::string>* compiled_classes,
                           size_t thread_count, bool dump_stats, bool dump_passes,
+                          const std::string& dump_cfg_file_name,
                           CumulativeLogger* timer, int swap_fd,
                           const std::string& profile_file);
 
@@ -371,6 +372,10 @@ class CompilerDriver {
     return dump_passes_;
   }
 
+  const std::string& GetDumpCfgFileName() const {
+    return dump_cfg_file_name_;
+  }
+
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
@@ -542,6 +547,7 @@ class CompilerDriver {
 
   bool dump_stats_;
   const bool dump_passes_;
+  const std::string& dump_cfg_file_name_;
 
   CumulativeLogger* const timings_logger_;
 
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 273b62deee..94268de077 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -1108,6 +1108,14 @@ class ElfBuilder FINAL {
                                EF_MIPS_ARCH_32R2);
         break;
       }
+      case kMips64: {
+        elf_header_.e_machine = EM_MIPS;
+        elf_header_.e_flags = (EF_MIPS_NOREORDER |
+                               EF_MIPS_PIC       |
+                               EF_MIPS_CPIC      |
+                               EF_MIPS_ARCH_64R6);
+        break;
+      }
       default: {
         fatal_error_ = true;
         LOG(FATAL) << "Unknown instruction set: " << isa;
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 281e3fe109..f513ea8124 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -973,6 +973,9 @@ void Java_MyClassNatives_staticMethodThatShouldTakeClass(JNIEnv*, jclass, jclass
 }
 
 void JniCompilerTest::UpcallArgumentTypeChecking_InstanceImpl() {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   SetUpForTest(false, "instanceMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
                reinterpret_cast<void*>(&Java_MyClassNatives_instanceMethodThatShouldTakeClass));
 
@@ -985,6 +988,9 @@ void JniCompilerTest::UpcallArgumentTypeChecking_InstanceImpl() {
 JNI_TEST(UpcallArgumentTypeChecking_Instance)
 
 void JniCompilerTest::UpcallArgumentTypeChecking_StaticImpl() {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   SetUpForTest(true, "staticMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
                reinterpret_cast<void*>(&Java_MyClassNatives_staticMethodThatShouldTakeClass));
 
@@ -1475,6 +1481,9 @@ void JniCompilerTest::MaxParamNumberImpl() {
 JNI_TEST(MaxParamNumber)
 
 void JniCompilerTest::WithoutImplementationImpl() {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   SetUpForTest(false, "withoutImplementation", "()V", nullptr);
 
   env_->CallVoidMethod(jobj_, jmethod_);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index b3ab370c99..d3d20555d8 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -92,7 +92,7 @@ TEST_F(OatTest, WriteRead) {
                                             method_inliner_map_.get(),
                                             compiler_kind, insn_set,
                                             insn_features.get(), false, nullptr, nullptr, 2, true,
-                                            true, timer_.get(), -1, ""));
+                                            true, "", timer_.get(), -1, ""));
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f6ca6c740e..9c2facb75e 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -604,7 +604,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
   HInvoke* invoke = nullptr;
   if (optimized_invoke_type == kVirtual) {
     invoke = new (arena_) HInvokeVirtual(
-        arena_, number_of_arguments, return_type, dex_pc, table_index);
+        arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
   } else if (optimized_invoke_type == kInterface) {
     invoke = new (arena_) HInvokeInterface(
         arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0c1ff9bff5..9e8907078b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -54,6 +54,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
                      + GetGraph()->GetTemporariesVRegSlots()
                      + 1 /* filler */,
                    0, /* the baseline compiler does not have live registers at slow path */
+                   0, /* the baseline compiler does not have live registers at slow path */
                    GetGraph()->GetMaximumNumberOfOutVRegs()
                      + 1 /* current method */);
   GenerateFrameEntry();
@@ -136,14 +137,16 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
 }
 
 void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots,
-                                     size_t maximum_number_of_live_registers,
+                                     size_t maximum_number_of_live_core_registers,
+                                     size_t maximum_number_of_live_fp_registers,
                                      size_t number_of_out_slots) {
   first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
 
   SetFrameSize(RoundUp(
       number_of_spill_slots * kVRegSize
       + number_of_out_slots * kVRegSize
-      + maximum_number_of_live_registers * GetWordSize()
+      + maximum_number_of_live_core_registers * GetWordSize()
+      + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
       + FrameEntrySpillSize(),
       kStackAlignment));
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 8d28f3da25..88e50b6c88 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -109,9 +109,11 @@ class CodeGenerator {
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
   virtual size_t GetWordSize() const = 0;
+  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
   virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
   void ComputeFrameSize(size_t number_of_spill_slots,
-                        size_t maximum_number_of_live_registers,
+                        size_t maximum_number_of_live_core_registers,
+                        size_t maximum_number_of_live_fp_registers,
                         size_t number_of_out_slots);
   virtual size_t FrameEntrySpillSize() const = 0;
   int32_t GetStackSlot(HLocal* local) const;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1862061bcf..c4ba0fd3e5 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1190,7 +1190,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
       kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
   // temp = temp[index_in_cache]
   __ LoadFromOffset(
-      kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()));
+      kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
   // LR = temp[offset_of_quick_compiled_code]
   __ LoadFromOffset(kLoadWord, LR, temp,
                      mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8b29b159ab..267d9a2cef 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -169,6 +169,11 @@ class CodeGeneratorARM : public CodeGenerator {
     return kArmWordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // Allocated in S registers, which are word sized.
+    return kArmWordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7b19f44e78..6d2c3de5d5 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1975,7 +1975,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
   // Make sure that ArtMethod* is passed in W0 as per the calling convention
   DCHECK(temp.Is(w0));
   size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() +
-    invoke->GetIndexInDexCache() * kHeapRefSize;
+    invoke->GetDexMethodIndex() * kHeapRefSize;
 
   // TODO: Implement all kinds of calls:
   // 1) boot -> boot
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index e4da07be43..590bc1d778 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -191,6 +191,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
     return kArm64WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // Allocated in D registers, which are word sized.
+    return kArm64WordSize;
+  }
+
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     vixl::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 04e36cc58a..1a0df44ea6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1135,7 +1135,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
   // temp = temp->dex_cache_resolved_methods_;
   __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
   // temp = temp[index_in_cache]
-  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
+  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(
       temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index acde122917..2d8adb2cf1 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -166,6 +166,11 @@ class CodeGeneratorX86 : public CodeGenerator {
     return kX86WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // 8 bytes == 2 words for each spill.
+    return 2 * kX86WordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5fc24f71e6..3d7f122d36 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -18,6 +18,8 @@
 
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_x86_64.h"
 #include "mirror/array-inl.h"
 #include "mirror/art_method.h"
 #include "mirror/class.h"
@@ -61,20 +63,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR
 
 #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
 
-class SlowPathCodeX86_64 : public SlowPathCode {
- public:
-  SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
-
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
-
- private:
-  Label entry_label_;
-  Label exit_label_;
-
-  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
-};
-
 class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
@@ -375,6 +363,31 @@ inline Condition X86_64Condition(IfCondition cond) {
   return kEqual;
 }
 
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                     CpuRegister temp) {
+  // All registers are assumed to be correctly set up.
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  // temp = temp[index_in_cache]
+  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+  // (temp + offset_of_quick_compiled_code)()
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kX86_64WordSize).SizeValue()));
+
+  DCHECK(!IsLeafMethod());
+  RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -1123,30 +1136,31 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
-void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
+}
 
-  // temp = method;
-  codegen_->LoadCurrentMethod(temp);
-  // temp = temp->dex_cache_resolved_methods_;
-  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
-  // temp = temp[index_in_cache]
-  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
-  // (temp + offset_of_quick_compiled_code)()
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86_64WordSize).SizeValue()));
+void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
 
-  DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  codegen_->GenerateStaticOrDirectCall(
+      invoke,
+      invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>());
 }
 
 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
@@ -1182,10 +1196,19 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
 }
 
 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
   CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
           invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 87f6b0f779..c501568a89 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -36,6 +36,8 @@ static constexpr FloatRegister kParameterFloatRegisters[] =
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
 
+static constexpr bool kCoalescedImplicitNullCheck = false;
+
 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
   InvokeDexCallingConvention() : CallingConvention(
@@ -67,7 +69,20 @@ class InvokeDexCallingConventionVisitor {
 };
 
 class CodeGeneratorX86_64;
-class SlowPathCodeX86_64;
+
+class SlowPathCodeX86_64 : public SlowPathCode {
+ public:
+  SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
+
+  Label* GetEntryLabel() { return &entry_label_; }
+  Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+  Label entry_label_;
+  Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
+};
 
 class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
  public:
@@ -169,6 +184,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
     return kX86_64WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    return kX86_64WordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -222,6 +241,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
     return false;
   }
 
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 493d93f052..532167c179 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -44,10 +44,10 @@ void HInliner::Run() {
          instr_it.Advance()) {
       HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect();
       if (current != nullptr) {
-        if (!TryInline(current, current->GetIndexInDexCache(), current->GetInvokeType())) {
+        if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) {
           if (kIsDebugBuild) {
             std::string callee_name =
-                PrettyMethod(current->GetIndexInDexCache(), *outer_compilation_unit_.GetDexFile());
+                PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
             bool should_inline = callee_name.find("$inline$") != std::string::npos;
             CHECK(!should_inline) << "Could not inline " << callee_name;
           }
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
new file mode 100644
index 0000000000..fe0e7f2eb2
--- /dev/null
+++ b/compiler/optimizing/intrinsics.cc
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics.h"
+
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "driver/compiler_driver.h"
+#include "invoke_type.h"
+#include "nodes.h"
+#include "quick/inline_method_analyser.h"
+
+namespace art {
+
+// Function that returns whether an intrinsic is static/direct or virtual.
+static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) {
+  switch (i) {
+    case Intrinsics::kNone:
+      return kInterface;  // Non-sensical for intrinsic.
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+    case Intrinsics::k ## Name:               \
+      return IsStatic;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return kInterface;
+}
+
+
+
+static Primitive::Type GetType(uint64_t data, bool is_op_size) {
+  if (is_op_size) {
+    switch (static_cast<OpSize>(data)) {
+      case kSignedByte:
+        return Primitive::Type::kPrimByte;
+      case kSignedHalf:
+        return Primitive::Type::kPrimShort;
+      case k32:
+        return Primitive::Type::kPrimInt;
+      case k64:
+        return Primitive::Type::kPrimLong;
+      default:
+        LOG(FATAL) << "Unknown/unsupported op size " << data;
+        UNREACHABLE();
+    }
+  } else {
+    if ((data & kIntrinsicFlagIsLong) != 0) {
+      return Primitive::Type::kPrimLong;
+    }
+    if ((data & kIntrinsicFlagIsObject) != 0) {
+      return Primitive::Type::kPrimNot;
+    }
+    return Primitive::Type::kPrimInt;
+  }
+}
+
+static Intrinsics GetIntrinsic(InlineMethod method) {
+  switch (method.opcode) {
+    // Floating-point conversions.
+    case kIntrinsicDoubleCvt:
+      return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+          Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble;
+    case kIntrinsicFloatCvt:
+      return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+          Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat;
+
+    // Bit manipulations.
+    case kIntrinsicReverseBits:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kIntegerReverse;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kLongReverse;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    case kIntrinsicReverseBytes:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimShort:
+          return Intrinsics::kShortReverseBytes;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kIntegerReverseBytes;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kLongReverseBytes;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+
+    // Abs.
+    case kIntrinsicAbsDouble:
+      return Intrinsics::kMathAbsDouble;
+    case kIntrinsicAbsFloat:
+      return Intrinsics::kMathAbsFloat;
+    case kIntrinsicAbsInt:
+      return Intrinsics::kMathAbsInt;
+    case kIntrinsicAbsLong:
+      return Intrinsics::kMathAbsLong;
+
+    // Min/max.
+    case kIntrinsicMinMaxDouble:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble;
+    case kIntrinsicMinMaxFloat:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat;
+    case kIntrinsicMinMaxInt:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt;
+    case kIntrinsicMinMaxLong:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
+
+    // Misc math.
+    case kIntrinsicSqrt:
+      return Intrinsics::kMathSqrt;
+    case kIntrinsicCeil:
+      return Intrinsics::kMathCeil;
+    case kIntrinsicFloor:
+      return Intrinsics::kMathFloor;
+    case kIntrinsicRint:
+      return Intrinsics::kMathRint;
+    case kIntrinsicRoundDouble:
+      return Intrinsics::kMathRoundDouble;
+    case kIntrinsicRoundFloat:
+      return Intrinsics::kMathRoundFloat;
+
+    // System.arraycopy.
+    case kIntrinsicSystemArrayCopyCharArray:
+      return Intrinsics::kSystemArrayCopyChar;
+
+    // Thread.currentThread.
+    case kIntrinsicCurrentThread:
+      return  Intrinsics::kThreadCurrentThread;
+
+    // Memory.peek.
+    case kIntrinsicPeek:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimByte:
+          return Intrinsics::kMemoryPeekByte;
+        case Primitive::Type::kPrimShort:
+          return Intrinsics::kMemoryPeekShortNative;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kMemoryPeekIntNative;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kMemoryPeekLongNative;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+
+    // Memory.poke.
+    case kIntrinsicPoke:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimByte:
+          return Intrinsics::kMemoryPokeByte;
+        case Primitive::Type::kPrimShort:
+          return Intrinsics::kMemoryPokeShortNative;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kMemoryPokeIntNative;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kMemoryPokeLongNative;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+
+    // String.
+    case kIntrinsicCharAt:
+      return Intrinsics::kStringCharAt;
+    case kIntrinsicCompareTo:
+      return Intrinsics::kStringCompareTo;
+    case kIntrinsicIsEmptyOrLength:
+      return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+          Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
+    case kIntrinsicIndexOf:
+      return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
+          Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
+
+    case kIntrinsicCas:
+      switch (GetType(method.d.data, false)) {
+        case Primitive::Type::kPrimNot:
+          return Intrinsics::kUnsafeCASObject;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kUnsafeCASInt;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kUnsafeCASLong;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    case kIntrinsicUnsafeGet: {
+      const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
+      switch (GetType(method.d.data, false)) {
+        case Primitive::Type::kPrimInt:
+          return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet;
+        case Primitive::Type::kPrimLong:
+          return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    }
+    case kIntrinsicUnsafePut: {
+      enum Sync { kNoSync, kVolatile, kOrdered };
+      const Sync sync =
+          ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile :
+          ((method.d.data & kIntrinsicFlagIsOrdered) != 0)  ? kOrdered :
+                                                              kNoSync;
+      switch (GetType(method.d.data, false)) {
+        case Primitive::Type::kPrimInt:
+          switch (sync) {
+            case kNoSync:
+              return Intrinsics::kUnsafePut;
+            case kVolatile:
+              return Intrinsics::kUnsafePutVolatile;
+            case kOrdered:
+              return Intrinsics::kUnsafePutOrdered;
+          }
+          break;
+        case Primitive::Type::kPrimLong:
+          switch (sync) {
+            case kNoSync:
+              return Intrinsics::kUnsafePutLong;
+            case kVolatile:
+              return Intrinsics::kUnsafePutLongVolatile;
+            case kOrdered:
+              return Intrinsics::kUnsafePutLongOrdered;
+          }
+          break;
+        case Primitive::Type::kPrimNot:
+          switch (sync) {
+            case kNoSync:
+              return Intrinsics::kUnsafePutObject;
+            case kVolatile:
+              return Intrinsics::kUnsafePutObjectVolatile;
+            case kOrdered:
+              return Intrinsics::kUnsafePutObjectOrdered;
+          }
+          break;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    }
+
+    // Virtual cases.
+
+    case kIntrinsicReferenceGetReferent:
+      return Intrinsics::kReferenceGetReferent;
+
+    // Quick inliner cases. Remove after refactoring. They are here so that we can use the
+    // compiler to warn on missing cases.
+
+    case kInlineOpNop:
+    case kInlineOpReturnArg:
+    case kInlineOpNonWideConst:
+    case kInlineOpIGet:
+    case kInlineOpIPut:
+      return Intrinsics::kNone;
+
+    // No default case to make the compiler warn on missing cases.
+  }
+  return Intrinsics::kNone;
+}
+
+static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) {
+  // The DexFileMethodInliner should have checked whether the methods are agreeing with
+  // what we expect, i.e., static methods are called as such. Add another check here for
+  // our expectations:
+  // Whenever the intrinsic is marked as static-or-direct, report an error if we find an
+  // InvokeVirtual. The other direction is not possible: we have intrinsics for virtual
+  // functions that will perform a check inline. If the precise type is known, however,
+  // the instruction will be sharpened to an InvokeStaticOrDirect.
+  InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
+  InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
+      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->IsInvokeVirtual() ? kVirtual : kSuper;
+  switch (intrinsic_type) {
+    case kStatic:
+      return (invoke_type == kStatic);
+    case kDirect:
+      return (invoke_type == kDirect);
+    case kVirtual:
+      // Call might be devirtualized.
+      return (invoke_type == kVirtual || invoke_type == kDirect);
+
+    default:
+      return false;
+  }
+}
+
+// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod.
+void IntrinsicsRecognizer::Run() {
+  DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_);
+  DCHECK(inliner != nullptr);
+
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
+         inst_it.Advance()) {
+      HInstruction* inst = inst_it.Current();
+      if (inst->IsInvoke()) {
+        HInvoke* invoke = inst->AsInvoke();
+        InlineMethod method;
+        if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
+          Intrinsics intrinsic = GetIntrinsic(method);
+
+          if (intrinsic != Intrinsics::kNone) {
+            if (!CheckInvokeType(intrinsic, invoke)) {
+              LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
+                           << intrinsic << " for "
+                           << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_);
+            } else {
+              invoke->SetIntrinsic(intrinsic);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
+  switch (intrinsic) {
+    case Intrinsics::kNone:
+      os << "No intrinsic.";
+      break;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+    case Intrinsics::k ## Name: \
+      os << # Name; \
+      break;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef STATIC_INTRINSICS_LIST
+#undef VIRTUAL_INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return os;
+}
+
+}  // namespace art
+
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
new file mode 100644
index 0000000000..29cc8efcc3
--- /dev/null
+++ b/compiler/optimizing/intrinsics.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CompilerDriver;
+class DexFile;
+
+// Recognize intrinsics from HInvoke nodes.
+class IntrinsicsRecognizer : public HOptimization {
+ public:
+  IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver)
+      : HOptimization(graph, true, "intrinsics_recognition"),
+        dex_file_(dex_file), driver_(driver) {}
+
+  void Run() OVERRIDE;
+
+ private:
+  const DexFile* dex_file_;
+  CompilerDriver* driver_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer);
+};
+
+class IntrinsicVisitor : public ValueObject {
+ public:
+  virtual ~IntrinsicVisitor() {}
+
+  // Dispatch logic.
+
+  void Dispatch(HInvoke* invoke) {
+    switch (invoke->GetIntrinsic()) {
+      case Intrinsics::kNone:
+        return;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+      case Intrinsics::k ## Name:             \
+        Visit ## Name(invoke);                \
+        return;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+      // Do not put a default case. That way the compiler will complain if we missed a case.
+    }
+  }
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)                    \
+  virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+  }
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ protected:
+  IntrinsicVisitor() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
new file mode 100644
index 0000000000..29ca20cca0
--- /dev/null
+++ b/compiler/optimizing/intrinsics_list.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+
+// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected
+// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual).
+
+#define INTRINSICS_LIST(V) \
+  V(DoubleDoubleToRawLongBits, kStatic) \
+  V(DoubleLongBitsToDouble, kStatic) \
+  V(FloatFloatToRawIntBits, kStatic) \
+  V(FloatIntBitsToFloat, kStatic) \
+  V(IntegerReverse, kStatic) \
+  V(IntegerReverseBytes, kStatic) \
+  V(LongReverse, kStatic) \
+  V(LongReverseBytes, kStatic) \
+  V(ShortReverseBytes, kStatic) \
+  V(MathAbsDouble, kStatic) \
+  V(MathAbsFloat, kStatic) \
+  V(MathAbsLong, kStatic) \
+  V(MathAbsInt, kStatic) \
+  V(MathMinDoubleDouble, kStatic) \
+  V(MathMinFloatFloat, kStatic) \
+  V(MathMinLongLong, kStatic) \
+  V(MathMinIntInt, kStatic) \
+  V(MathMaxDoubleDouble, kStatic) \
+  V(MathMaxFloatFloat, kStatic) \
+  V(MathMaxLongLong, kStatic) \
+  V(MathMaxIntInt, kStatic) \
+  V(MathSqrt, kStatic) \
+  V(MathCeil, kStatic) \
+  V(MathFloor, kStatic) \
+  V(MathRint, kStatic) \
+  V(MathRoundDouble, kStatic) \
+  V(MathRoundFloat, kStatic) \
+  V(SystemArrayCopyChar, kStatic) \
+  V(ThreadCurrentThread, kStatic) \
+  V(MemoryPeekByte, kStatic) \
+  V(MemoryPeekIntNative, kStatic) \
+  V(MemoryPeekLongNative, kStatic) \
+  V(MemoryPeekShortNative, kStatic) \
+  V(MemoryPokeByte, kStatic) \
+  V(MemoryPokeIntNative, kStatic) \
+  V(MemoryPokeLongNative, kStatic) \
+  V(MemoryPokeShortNative, kStatic) \
+  V(StringCharAt, kDirect) \
+  V(StringCompareTo, kDirect) \
+  V(StringIsEmpty, kDirect) \
+  V(StringIndexOf, kDirect) \
+  V(StringIndexOfAfter, kDirect) \
+  V(StringLength, kDirect) \
+  V(UnsafeCASInt, kDirect) \
+  V(UnsafeCASLong, kDirect) \
+  V(UnsafeCASObject, kDirect) \
+  V(UnsafeGet, kDirect) \
+  V(UnsafeGetVolatile, kDirect) \
+  V(UnsafeGetLong, kDirect) \
+  V(UnsafeGetLongVolatile, kDirect) \
+  V(UnsafePut, kDirect) \
+  V(UnsafePutOrdered, kDirect) \
+  V(UnsafePutVolatile, kDirect) \
+  V(UnsafePutObject, kDirect) \
+  V(UnsafePutObjectOrdered, kDirect) \
+  V(UnsafePutObjectVolatile, kDirect) \
+  V(UnsafePutLong, kDirect) \
+  V(UnsafePutLongOrdered, kDirect) \
+  V(UnsafePutLongVolatile, kDirect) \
+  \
+  V(ReferenceGetReferent, kVirtual)
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
new file mode 100644
index 0000000000..c1f4c94b7f
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -0,0 +1,984 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_x86_64.h"
+
+#include "code_generator_x86_64.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/x86_64/assembler_x86_64.h"
+#include "utils/x86_64/constants_x86_64.h"
+
+namespace art {
+
+namespace x86_64 {
+
+static constexpr bool kIntrinsified = true;
+
+X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
+  return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  const LocationSummary* res = invoke->GetLocations();
+  return res != nullptr && res->Intrinsified();
+}
+
+#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
+
+// TODO: trg as memory.
+static void MoveFromReturnRegister(Location trg,
+                                   Primitive::Type type,
+                                   CodeGeneratorX86_64* codegen) {
+  if (!trg.IsValid()) {
+    DCHECK(type == Primitive::kPrimVoid);
+    return;
+  }
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+      if (trg_reg.AsRegister() != RAX) {
+        __ movl(trg_reg, CpuRegister(RAX));
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+      if (trg_reg.AsRegister() != RAX) {
+        __ movq(trg_reg, CpuRegister(RAX));
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected void type for valid location " << trg;
+      UNREACHABLE();
+
+    case Primitive::kPrimDouble: {
+      XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+      if (trg_reg.AsFloatRegister() != XMM0) {
+        __ movsd(trg_reg, XmmRegister(XMM0));
+      }
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+      if (trg_reg.AsFloatRegister() != XMM0) {
+        __ movss(trg_reg, XmmRegister(XMM0));
+      }
+      break;
+    }
+  }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
+  if (invoke->InputCount() == 0) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+  // We're moving potentially two or more locations to locations that could overlap, so we need
+  // a parallel move resolver.
+  HParallelMove parallel_move(arena);
+
+  for (size_t i = 0; i < invoke->InputCount(); i++) {
+    HInstruction* input = invoke->InputAt(i);
+    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+    Location actual_loc = locations->InAt(i);
+
+    parallel_move.AddMove(new (arena) MoveOperands(actual_loc, cc_loc, nullptr));
+  }
+
+  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
+//       restored!
+class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
+ public:
+  explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
+    __ Bind(GetEntryLabel());
+
+    codegen->SaveLiveRegisters(invoke_->GetLocations());
+
+    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+    } else {
+      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+      UNREACHABLE();
+    }
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+    }
+
+    codegen->RestoreLiveRegisters(invoke_->GetLocations());
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
+};
+
+#undef __
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+                            Primitive::Type size,
+                            X86_64Assembler* assembler) {
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  switch (size) {
+    case Primitive::kPrimShort:
+      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+      __ bswapl(out);
+      __ sarl(out, Immediate(16));
+      break;
+    case Primitive::kPrimInt:
+      __ bswapl(out);
+      break;
+    case Primitive::kPrimLong:
+      __ bswapq(out);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
+      UNREACHABLE();
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+
+// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
+//       need is 64b.
+
+static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+  // TODO: Enable memory operations when the assembler supports them.
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  // TODO: Allow x86 to work with memory. This requires assembler support, see below.
+  // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
+  locations->AddTemp(Location::RequiresFpuRegister());  // FP version of above.
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location output = locations->Out();
+  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  if (output.IsFpuRegister()) {
+    // In-register
+    XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+
+    if (is64bit) {
+      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+      __ movd(xmm_temp, cpu_temp);
+      __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+    } else {
+      __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
+      __ movd(xmm_temp, cpu_temp);
+      __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+    }
+  } else {
+    // TODO: update when assember support is available.
+    UNIMPLEMENTED(FATAL) << "Needs assembler support.";
+//  Once assembler support is available, in-memory operations look like this:
+//    if (is64bit) {
+//      DCHECK(output.IsDoubleStackSlot());
+//      // No 64b and with literal.
+//      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+//      __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
+//    } else {
+//      DCHECK(output.IsStackSlot());
+//      // Can use and with a literal directly.
+//      __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
+//    }
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location output = locations->Out();
+  CpuRegister out = output.AsRegister<CpuRegister>();
+  CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  if (is64bit) {
+    // Create mask.
+    __ movq(mask, out);
+    __ sarq(mask, Immediate(63));
+    // Add mask.
+    __ addq(out, mask);
+    __ xorq(out, mask);
+  } else {
+    // Create mask.
+    __ movl(mask, out);
+    __ sarl(mask, Immediate(31));
+    // Add mask.
+    __ addl(out, mask);
+    __ xorl(out, mask);
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
+                        X86_64Assembler* assembler) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+  XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    DCHECK(out_loc.Equals(op1_loc));
+    return;
+  }
+
+  //  (out := op1)
+  //  out <=? op2
+  //  if Nan jmp Nan_label
+  //  if out is min jmp done
+  //  if op2 is min jmp op2_label
+  //  handle -0/+0
+  //  jmp done
+  // Nan_label:
+  //  out := NaN
+  // op2_label:
+  //  out := op2
+  // done:
+  //
+  // This removes one jmp, but needs to copy one input (op1) to out.
+  //
+  // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+  XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+  Label nan, done, op2_label;
+  if (is_double) {
+    __ ucomisd(out, op2);
+  } else {
+    __ ucomiss(out, op2);
+  }
+
+  __ j(Condition::kParityEven, &nan);
+
+  __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+  __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+  // Handle 0.0/-0.0.
+  if (is_min) {
+    if (is_double) {
+      __ orpd(out, op2);
+    } else {
+      __ orps(out, op2);
+    }
+  } else {
+    if (is_double) {
+      __ andpd(out, op2);
+    } else {
+      __ andps(out, op2);
+    }
+  }
+  __ jmp(&done);
+
+  // NaN handling.
+  __ Bind(&nan);
+  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+  // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
+  if (is_double) {
+    __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+  } else {
+    __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+  }
+  __ movd(out, cpu_temp, is_double);
+  __ jmp(&done);
+
+  // out := op2;
+  __ Bind(&op2_label);
+  if (is_double) {
+    __ movsd(out, op2);
+  } else {
+    __ movss(out, op2);
+  }
+
+  // Done.
+  __ Bind(&done);
+}
+
+static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+  // the second input to be the output (we can simply swap inputs).
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
+                      X86_64Assembler* assembler) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    // Can return immediately, as op1_loc == out_loc.
+    // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+    //       a copy here.
+    DCHECK(locations->Out().Equals(op1_loc));
+    return;
+  }
+
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+  //  (out := op1)
+  //  out <=? op2
+  //  if out is min jmp done
+  //  out := op2
+  // done:
+
+  if (is_long) {
+    __ cmpq(out, op2);
+  } else {
+    __ cmpl(out, op2);
+  }
+
+  __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+
+  GetAssembler()->sqrtsd(out, in);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
+  // The inputs plus one temp.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Location of reference to data array
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  // Starting offset within data array
+  const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
+  // Start of char data with array_
+  const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
+
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location temp_loc = locations->GetTemp(0);
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+  // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we
+  //       move to (coalesced) implicit checks, we have to do a null check below.
+  DCHECK(!kCoalescedImplicitNullCheck);
+
+  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+  //       the cost.
+  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+  //       we will not optimize the code for constants (which would save a register).
+
+  SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  X86_64Assembler* assembler = GetAssembler();
+
+  __ cmpl(idx, Address(obj, count_offset));
+  __ j(kAboveEqual, slow_path->GetEntryLabel());
+
+  // Get the actual element.
+  __ movl(temp, idx);                          // temp := idx.
+  __ addl(temp, Address(obj, offset_offset));  // temp := offset + idx.
+  __ movl(out, Address(obj, value_offset));    // obj := obj.array.
+  // out = out[2*temp].
+  __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+  CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
+  // x86 allows unaligned access. We do not have to check the input or use specific instructions
+  // to avoid a SIGBUS.
+  switch (size) {
+    case Primitive::kPrimByte:
+      __ movsxb(out, Address(address, 0));
+      break;
+    case Primitive::kPrimShort:
+      __ movsxw(out, Address(address, 0));
+      break;
+    case Primitive::kPrimInt:
+      __ movl(out, Address(address, 0));
+      break;
+    case Primitive::kPrimLong:
+      __ movq(out, Address(address, 0));
+      break;
+    default:
+      LOG(FATAL) << "Type not recognized for peek: " << size;
+      UNREACHABLE();
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+  CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
+  // x86 allows unaligned access. We do not have to check the input or use specific instructions
+  // to avoid a SIGBUS.
+  switch (size) {
+    case Primitive::kPrimByte:
+      __ movb(Address(address, 0), value);
+      break;
+    case Primitive::kPrimShort:
+      __ movw(Address(address, 0), value);
+      break;
+    case Primitive::kPrimInt:
+      __ movl(Address(address, 0), value);
+      break;
+    case Primitive::kPrimLong:
+      __ movq(Address(address, 0), value);
+      break;
+    default:
+      LOG(FATAL) << "Type not recognized for poke: " << size;
+      UNREACHABLE();
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+  CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
+  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
+}
+
+static void GenUnsafeGet(LocationSummary* locations, bool is_long,
+                         bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
+
+  if (is_long) {
+    __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+  } else {
+    // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object
+    //       pointer will entail an unpack operation.
+    __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+  }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
+                                                       Primitive::Type type,
+                                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+
+// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
+// memory model.
+static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
+                         CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+  } else {
+    __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+  }
+
+  if (is_volatile) {
+    __ mfence();
+  }
+
+  if (type == Primitive::kPrimNot) {
+    codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+                        locations->GetTemp(1).AsRegister<CpuRegister>(),
+                        base,
+                        value);
+  }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
+void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}                                                                                       \
+void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(MathFloor)
+UNIMPLEMENTED_INTRINSIC(MathCeil)
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty)  // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength)   // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
new file mode 100644
index 0000000000..c1fa99c2dc
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace x86_64 {
+
+class CodeGeneratorX86_64;
+class X86_64Assembler;
+
+class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
+};
+
+class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  X86_64Assembler* GetAssembler();
+
+  ArenaAllocator* GetArena();
+
+  CodeGeneratorX86_64* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ed5e260a5b..9f2f9ece85 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -20,7 +20,9 @@
 
 namespace art {
 
-LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind)
+LocationSummary::LocationSummary(HInstruction* instruction,
+                                 CallKind call_kind,
+                                 bool intrinsified)
     : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
       temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
       environment_(instruction->GetBlock()->GetGraph()->GetArena(),
@@ -29,7 +31,8 @@ LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind)
       call_kind_(call_kind),
       stack_mask_(nullptr),
       register_mask_(0),
-      live_registers_() {
+      live_registers_(),
+      intrinsified_(intrinsified) {
   inputs_.SetSize(instruction->InputCount());
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     inputs_.Put(i, Location());
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 7df99d4b6f..d41b3aecfd 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -463,7 +463,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
     kCall
   };
 
-  LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
+  LocationSummary(HInstruction* instruction,
+                  CallKind call_kind = kNoCall,
+                  bool intrinsified = false);
 
   void SetInAt(uint32_t at, Location location) {
     DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid());
@@ -574,6 +576,10 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
     return output_overlaps_;
   }
 
+  bool Intrinsified() const {
+    return intrinsified_;
+  }
+
  private:
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
@@ -593,6 +599,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
   // Registers that are in use at this position.
   RegisterSet live_registers_;
 
+  // Whether these are locations for an intrinsified call.
+  const bool intrinsified_;
+
   ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
   ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b98bc70a9f..f2aa043849 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1580,19 +1580,18 @@ class HLongConstant : public HConstant {
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
+enum class Intrinsics {
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name,
+#include "intrinsics_list.h"
+  kNone,
+  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+};
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
+
 class HInvoke : public HInstruction {
  public:
-  HInvoke(ArenaAllocator* arena,
-          uint32_t number_of_arguments,
-          Primitive::Type return_type,
-          uint32_t dex_pc)
-    : HInstruction(SideEffects::All()),
-      inputs_(arena, number_of_arguments),
-      return_type_(return_type),
-      dex_pc_(dex_pc) {
-    inputs_.SetSize(number_of_arguments);
-  }
-
   virtual size_t InputCount() const { return inputs_.Size(); }
   virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); }
 
@@ -1612,12 +1611,38 @@ class HInvoke : public HInstruction {
 
   uint32_t GetDexPc() const { return dex_pc_; }
 
+  uint32_t GetDexMethodIndex() const { return dex_method_index_; }
+
+  Intrinsics GetIntrinsic() {
+    return intrinsic_;
+  }
+
+  void SetIntrinsic(Intrinsics intrinsic) {
+    intrinsic_ = intrinsic;
+  }
+
   DECLARE_INSTRUCTION(Invoke);
 
  protected:
+  HInvoke(ArenaAllocator* arena,
+          uint32_t number_of_arguments,
+          Primitive::Type return_type,
+          uint32_t dex_pc,
+          uint32_t dex_method_index)
+    : HInstruction(SideEffects::All()),
+      inputs_(arena, number_of_arguments),
+      return_type_(return_type),
+      dex_pc_(dex_pc),
+      dex_method_index_(dex_method_index),
+      intrinsic_(Intrinsics::kNone) {
+    inputs_.SetSize(number_of_arguments);
+  }
+
   GrowableArray<HInstruction*> inputs_;
   const Primitive::Type return_type_;
   const uint32_t dex_pc_;
+  const uint32_t dex_method_index_;
+  Intrinsics intrinsic_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
@@ -1629,19 +1654,16 @@ class HInvokeStaticOrDirect : public HInvoke {
                         uint32_t number_of_arguments,
                         Primitive::Type return_type,
                         uint32_t dex_pc,
-                        uint32_t index_in_dex_cache,
+                        uint32_t dex_method_index,
                         InvokeType invoke_type)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
-        index_in_dex_cache_(index_in_dex_cache),
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
         invoke_type_(invoke_type) {}
 
-  uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; }
   InvokeType GetInvokeType() const { return invoke_type_; }
 
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
  private:
-  const uint32_t index_in_dex_cache_;
   const InvokeType invoke_type_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
@@ -1653,8 +1675,9 @@ class HInvokeVirtual : public HInvoke {
                  uint32_t number_of_arguments,
                  Primitive::Type return_type,
                  uint32_t dex_pc,
+                 uint32_t dex_method_index,
                  uint32_t vtable_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
         vtable_index_(vtable_index) {}
 
   uint32_t GetVTableIndex() const { return vtable_index_; }
@@ -1675,8 +1698,7 @@ class HInvokeInterface : public HInvoke {
                    uint32_t dex_pc,
                    uint32_t dex_method_index,
                    uint32_t imt_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
-        dex_method_index_(dex_method_index),
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
         imt_index_(imt_index) {}
 
   uint32_t GetImtIndex() const { return imt_index_; }
@@ -1685,7 +1707,6 @@ class HInvokeInterface : public HInvoke {
   DECLARE_INSTRUCTION(InvokeInterface);
 
  private:
-  const uint32_t dex_method_index_;
   const uint32_t imt_index_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
@@ -2775,10 +2796,16 @@ class HParallelMove : public HTemplateInstruction<0> {
       : HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {}
 
   void AddMove(MoveOperands* move) {
-    if (kIsDebugBuild && move->GetInstruction() != nullptr) {
+    if (kIsDebugBuild) {
+      if (move->GetInstruction() != nullptr) {
+        for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+          DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction())
+            << "Doing parallel moves for the same instruction.";
+        }
+      }
       for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-        DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction())
-          << "Doing parallel moves for the same instruction.";
+        DCHECK(!move->GetDestination().Contains(moves_.Get(i)->GetDestination()))
+            << "Same destination for two moves in a parallel move.";
       }
     }
     moves_.Add(move);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 692d452f54..605637300f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
 #include "compiler.h"
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "elf_writer_quick.h"
@@ -32,6 +33,7 @@
 #include "gvn.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
+#include "intrinsics.h"
 #include "jni/quick/jni_compiler.h"
 #include "mirror/art_method-inl.h"
 #include "nodes.h"
@@ -149,11 +151,12 @@ void OptimizingCompiler::Init() {
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
-  if (driver->GetDumpPasses()) {
+  const std::string cfg_file_name = driver->GetDumpCfgFileName();
+  if (!cfg_file_name.empty()) {
     CHECK_EQ(driver->GetThreadCount(), 1U)
       << "Graph visualizer requires the compiler to run single-threaded. "
       << "Invoke the compiler with '-j1'.";
-    visualizer_output_.reset(new std::ofstream("art.cfg"));
+    visualizer_output_.reset(new std::ofstream(cfg_file_name));
   }
 }
 
@@ -214,9 +217,12 @@ static void RunOptimizations(HGraph* graph,
   BoundsCheckElimination bce(graph);
   InstructionSimplifier simplify2(graph);
 
+  IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
+
   HOptimization* optimizations[] = {
     &redundant_phi,
     &dead_phi,
+    &intrinsics,
     &dce,
     &fold,
     &simplify1,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 04b56345c4..b3eb1e2d51 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -45,8 +45,12 @@ namespace art {
 LiveInterval* BuildInterval(const size_t ranges[][2],
                             size_t number_of_ranges,
                             ArenaAllocator* allocator,
-                            int reg = -1) {
-  LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt);
+                            int reg = -1,
+                            HInstruction* defined_by = nullptr) {
+  LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt, defined_by);
+  if (defined_by != nullptr) {
+    defined_by->SetLiveInterval(interval);
+  }
   for (size_t i = number_of_ranges; i > 0; --i) {
     interval->AddRange(ranges[i - 1][0], ranges[i - 1][1]);
   }
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 210f7d7f09..7ab41b6ddc 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -120,16 +120,9 @@ TEST(ParallelMoveTest, Swap) {
 
   {
     TestParallelMoveResolver resolver(&allocator);
-    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}};
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}};
     resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
-    ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1)", resolver.GetMessage().c_str());
-  }
-
-  {
-    TestParallelMoveResolver resolver(&allocator);
-    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}, {5, 4}};
-    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
-    ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1) (5 -> 4)", resolver.GetMessage().c_str());
+    ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
   }
 }
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1efc52b9ec..93ed44ee44 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -56,7 +56,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
         blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
         blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
         reserved_out_slots_(0),
-        maximum_number_of_live_registers_(0) {
+        maximum_number_of_live_core_registers_(0),
+        maximum_number_of_live_fp_registers_(0) {
   codegen->SetupBlockedRegisters();
   physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
   physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
@@ -185,9 +186,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
   }
   LinearScan();
 
-  size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_;
-  maximum_number_of_live_registers_ = 0;
-
   inactive_.Reset();
   active_.Reset();
   handled_.Reset();
@@ -207,7 +205,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
     }
   }
   LinearScan();
-  maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers;
 }
 
 void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
@@ -602,8 +599,13 @@ void RegisterAllocator::LinearScan() {
     if (current->IsSlowPathSafepoint()) {
       // Synthesized interval to record the maximum number of live registers
       // at safepoints. No need to allocate a register for it.
-      maximum_number_of_live_registers_ =
-          std::max(maximum_number_of_live_registers_, active_.Size());
+      if (processing_core_registers_) {
+        maximum_number_of_live_core_registers_ =
+          std::max(maximum_number_of_live_core_registers_, active_.Size());
+      } else {
+        maximum_number_of_live_fp_registers_ =
+          std::max(maximum_number_of_live_fp_registers_, active_.Size());
+      }
       DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart());
       continue;
     }
@@ -855,7 +857,9 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
         DCHECK(!active->IsFixed());
         LiveInterval* split = Split(active, current->GetStart());
         active_.DeleteAt(i);
-        handled_.Add(active);
+        if (split != active) {
+          handled_.Add(active);
+        }
         AddSorted(unhandled_, split);
         break;
       }
@@ -876,9 +880,14 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
         if (next_intersection != kNoLifetime) {
           if (inactive->IsFixed()) {
             LiveInterval* split = Split(current, next_intersection);
+            DCHECK_NE(split, current);
             AddSorted(unhandled_, split);
           } else {
-            LiveInterval* split = Split(inactive, next_intersection);
+            // Split at the start of `current`, which will lead to splitting
+            // at the end of the lifetime hole of `inactive`.
+            LiveInterval* split = Split(inactive, current->GetStart());
+            // If it's inactive, it must start before the current interval.
+            DCHECK_NE(split, inactive);
             inactive_.DeleteAt(i);
             --i;
             --e;
@@ -1215,10 +1224,17 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
         locations->SetEnvironmentAt(use->GetInputIndex(), source);
       } else {
         Location expected_location = locations->InAt(use->GetInputIndex());
-        if (expected_location.IsUnallocated()) {
-          locations->SetInAt(use->GetInputIndex(), source);
-        } else if (!expected_location.IsConstant()) {
-          AddInputMoveFor(use->GetUser(), source, expected_location);
+        // The expected (actual) location may be invalid in case the input is unused. Currently
+        // this only happens for intrinsics.
+        if (expected_location.IsValid()) {
+          if (expected_location.IsUnallocated()) {
+            locations->SetInAt(use->GetInputIndex(), source);
+          } else if (!expected_location.IsConstant()) {
+            AddInputMoveFor(use->GetUser(), source, expected_location);
+          }
+        } else {
+          DCHECK(use->GetUser()->IsInvoke());
+          DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
         }
       }
       use = use->GetNext();
@@ -1255,8 +1271,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
       switch (source.GetKind()) {
         case Location::kRegister: {
           locations->AddLiveRegister(source);
-          DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_);
-
+          DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+                    maximum_number_of_live_core_registers_ +
+                    maximum_number_of_live_fp_registers_);
           if (current->GetType() == Primitive::kPrimNot) {
             locations->SetRegisterBit(source.reg());
           }
@@ -1349,7 +1366,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
 
 void RegisterAllocator::Resolve() {
   codegen_->ComputeFrameSize(
-      spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
+      spill_slots_.Size(), maximum_number_of_live_core_registers_,
+      maximum_number_of_live_fp_registers_, reserved_out_slots_);
 
   // Adjust the Out Location of instructions.
   // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index c152a8bf67..ec46a776b5 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -190,10 +190,14 @@ class RegisterAllocator {
   // Slots reserved for out arguments.
   size_t reserved_out_slots_;
 
-  // The maximum live registers at safepoints.
-  size_t maximum_number_of_live_registers_;
+  // The maximum live core registers at safepoints.
+  size_t maximum_number_of_live_core_registers_;
+
+  // The maximum live FP registers at safepoints.
+  size_t maximum_number_of_live_fp_registers_;
 
   ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
 
   DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
 };
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index c2ea80ec33..0948643355 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -738,4 +738,106 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
   }
 }
 
+// Test a bug in the register allocator, where allocating a blocked
+// register would lead to spilling an inactive interval at the wrong
+// position.
+TEST(RegisterAllocatorTest, SpillInactive) {
+  ArenaPool pool;
+
+  // Create a synthesized graph to please the register_allocator and
+  // ssa_liveness_analysis code.
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* one = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* two = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* three = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* four = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+  entry->AddInstruction(one);
+  entry->AddInstruction(two);
+  entry->AddInstruction(three);
+  entry->AddInstruction(four);
+
+  HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  entry->AddSuccessor(block);
+  block->AddInstruction(new (&allocator) HExit());
+
+  // We create a synthesized user requesting a register, to avoid just spilling the
+  // intervals.
+  HPhi* user = new (&allocator) HPhi(&allocator, 0, 1, Primitive::kPrimInt);
+  user->AddInput(one);
+  user->SetBlock(block);
+  LocationSummary* locations = new (&allocator) LocationSummary(user, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  static constexpr size_t phi_ranges[][2] = {{20, 30}};
+  BuildInterval(phi_ranges, arraysize(phi_ranges), &allocator, -1, user);
+
+  // Create an interval with lifetime holes.
+  static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
+  LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one);
+  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_);
+
+  locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+  first = first->SplitAt(1);
+
+  // Create an interval that conflicts with the next interval, to force the next
+  // interval to call `AllocateBlockedReg`.
+  static constexpr size_t ranges2[][2] = {{2, 4}};
+  LiveInterval* second = BuildInterval(ranges2, arraysize(ranges2), &allocator, -1, two);
+  locations = new (&allocator) LocationSummary(second->GetDefinedBy(), LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+
+  // Create an interval that will lead to splitting the first interval. The bug occured
+  // by splitting at a wrong position, in this case at the next intersection between
+  // this interval and the first interval. We would have then put the interval with ranges
+  // "[0, 2(, [4, 6(" in the list of handled intervals, even though we haven't processed intervals
+  // before lifetime position 6 yet.
+  static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
+  LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three);
+  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_);
+  locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+  third = third->SplitAt(3);
+
+  // Because the first part of the split interval was considered handled, this interval
+  // was free to allocate the same register, even though it conflicts with it.
+  static constexpr size_t ranges4[][2] = {{4, 6}};
+  LiveInterval* fourth = BuildInterval(ranges4, arraysize(ranges4), &allocator, -1, four);
+  locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+
+  x86::CodeGeneratorX86 codegen(graph);
+  SsaLivenessAnalysis liveness(*graph, &codegen);
+
+  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+  register_allocator.unhandled_core_intervals_.Add(fourth);
+  register_allocator.unhandled_core_intervals_.Add(third);
+  register_allocator.unhandled_core_intervals_.Add(second);
+  register_allocator.unhandled_core_intervals_.Add(first);
+
+  // Set just one register available to make all intervals compete for the same.
+  register_allocator.number_of_registers_ = 1;
+  register_allocator.registers_array_ = allocator.AllocArray<size_t>(1);
+  register_allocator.processing_core_registers_ = true;
+  register_allocator.unhandled_ = &register_allocator.unhandled_core_intervals_;
+  register_allocator.LinearScan();
+
+  // Test that there is no conflicts between intervals.
+  GrowableArray<LiveInterval*> intervals(&allocator, 0);
+  intervals.Add(first);
+  intervals.Add(second);
+  intervals.Add(third);
+  intervals.Add(fourth);
+  ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+      intervals, 0, 0, codegen, &allocator, true, false));
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 74611e1cbb..b632c4d05a 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -429,7 +429,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
     LiveRange* current = first_range_;
     LiveRange* previous = nullptr;
     // Iterate over the ranges, and either find a range that covers this position, or
-    // a two ranges in between this position (that is, the position is in a lifetime hole).
+    // two ranges in between this position (that is, the position is in a lifetime hole).
     do {
       if (position >= current->GetEnd()) {
         // Move to next range.
@@ -653,6 +653,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
   static constexpr int kNoRegister = -1;
   static constexpr int kNoSpillSlot = -1;
 
+  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
   DISALLOW_COPY_AND_ASSIGN(LiveInterval);
 };
 
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 385d1340fc..cb51ed8fc8 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -20,6 +20,7 @@
 #include "utils/arm/assembler_arm.h"
 #include "utils/arm64/assembler_arm64.h"
 #include "utils/mips/assembler_mips.h"
+#include "utils/mips64/assembler_mips64.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86_64/assembler_x86_64.h"
 
@@ -120,6 +121,35 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention
 }
 }  // namespace mips
 
+namespace mips64 {
+static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
+                                                    ThreadOffset<8> offset) {
+  std::unique_ptr<Mips64Assembler> assembler(static_cast<Mips64Assembler*>(Assembler::Create(kMips64)));
+
+  switch (abi) {
+    case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
+      __ LoadFromOffset(kLoadDoubleword, T9, A0, offset.Int32Value());
+      break;
+    case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
+      __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadDoubleword, T9, T9, offset.Int32Value());
+      break;
+    case kQuickAbi:  // Fall-through.
+      __ LoadFromOffset(kLoadDoubleword, T9, S1, offset.Int32Value());
+  }
+  __ Jr(T9);
+  __ Nop();
+  __ Break();
+
+  size_t cs = assembler->CodeSize();
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
+  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  assembler->FinalizeInstructions(code);
+
+  return entry_stub.release();
+}
+}  // namespace mips64
+
 namespace x86 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
   std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86)));
@@ -160,6 +190,8 @@ const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCal
   switch (isa) {
     case kArm64:
       return arm64::CreateTrampoline(abi, offset);
+    case kMips64:
+      return mips64::CreateTrampoline(abi, offset);
     case kX86_64:
       return x86_64::CreateTrampoline(offset);
     default:
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 68345129c3..5340dd3a25 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -23,6 +23,7 @@
 #include "arm/assembler_thumb2.h"
 #include "arm64/assembler_arm64.h"
 #include "mips/assembler_mips.h"
+#include "mips64/assembler_mips64.h"
 #include "x86/assembler_x86.h"
 #include "x86_64/assembler_x86_64.h"
 #include "globals.h"
@@ -115,6 +116,8 @@ Assembler* Assembler::Create(InstructionSet instruction_set) {
       return new arm64::Arm64Assembler();
     case kMips:
       return new mips::MipsAssembler();
+    case kMips64:
+      return new mips64::Mips64Assembler();
     case kX86:
       return new x86::X86Assembler();
     case kX86_64:
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 134dda4b2c..923ecdbd9d 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -47,6 +47,9 @@ namespace arm64 {
 namespace mips {
   class MipsAssembler;
 }
+namespace mips64 {
+  class Mips64Assembler;
+}
 namespace x86 {
   class X86Assembler;
 }
@@ -120,6 +123,7 @@ class Label {
   friend class arm::Thumb2Assembler;
   friend class arm64::Arm64Assembler;
   friend class mips::MipsAssembler;
+  friend class mips64::Mips64Assembler;
   friend class x86::X86Assembler;
   friend class x86_64::X86_64Assembler;
 
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index bfb2829a32..bb62bca3b9 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -30,6 +30,9 @@ class Arm64ManagedRegister;
 namespace mips {
 class MipsManagedRegister;
 }
+namespace mips64 {
+class Mips64ManagedRegister;
+}
 
 namespace x86 {
 class X86ManagedRegister;
@@ -54,6 +57,7 @@ class ManagedRegister {
   arm::ArmManagedRegister AsArm() const;
   arm64::Arm64ManagedRegister AsArm64() const;
   mips::MipsManagedRegister AsMips() const;
+  mips64::Mips64ManagedRegister AsMips64() const;
   x86::X86ManagedRegister AsX86() const;
   x86_64::X86_64ManagedRegister AsX86_64() const;
 
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
new file mode 100644
index 0000000000..233ae7db3c
--- /dev/null
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -0,0 +1,1036 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_mips64.h"
+
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "memory_region.h"
+#include "thread.h"
+
+namespace art {
+namespace mips64 {
+
+void Mips64Assembler::Emit(int32_t value) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Emit<int32_t>(value);
+}
+
+void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd,
+                            int shamt, int funct) {
+  CHECK_NE(rs, kNoGpuRegister);
+  CHECK_NE(rt, kNoGpuRegister);
+  CHECK_NE(rd, kNoGpuRegister);
+  int32_t encoding = opcode << kOpcodeShift |
+                     static_cast<int32_t>(rs) << kRsShift |
+                     static_cast<int32_t>(rt) << kRtShift |
+                     static_cast<int32_t>(rd) << kRdShift |
+                     shamt << kShamtShift |
+                     funct;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm) {
+  CHECK_NE(rs, kNoGpuRegister);
+  CHECK_NE(rt, kNoGpuRegister);
+  int32_t encoding = opcode << kOpcodeShift |
+                     static_cast<int32_t>(rs) << kRsShift |
+                     static_cast<int32_t>(rt) << kRtShift |
+                     imm;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitJ(int opcode, int address) {
+  int32_t encoding = opcode << kOpcodeShift |
+                     address;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd,
+int funct) {
+  CHECK_NE(ft, kNoFpuRegister);
+  CHECK_NE(fs, kNoFpuRegister);
+  CHECK_NE(fd, kNoFpuRegister);
+  int32_t encoding = opcode << kOpcodeShift |
+                     fmt << kFmtShift |
+                     static_cast<int32_t>(ft) << kFtShift |
+                     static_cast<int32_t>(fs) << kFsShift |
+                     static_cast<int32_t>(fd) << kFdShift |
+                     funct;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm) {
+  CHECK_NE(rt, kNoFpuRegister);
+  int32_t encoding = opcode << kOpcodeShift |
+                     fmt << kFmtShift |
+                     static_cast<int32_t>(rt) << kRtShift |
+                     imm;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal) {
+  int offset;
+  if (label->IsBound()) {
+    offset = label->Position() - buffer_.Size();
+  } else {
+    // Use the offset field of the branch instruction for linking the sites.
+    offset = label->position_;
+    label->LinkTo(buffer_.Size());
+  }
+  if (equal) {
+    Beq(rt, rs, (offset >> 2) & kBranchOffsetMask);
+  } else {
+    Bne(rt, rs, (offset >> 2) & kBranchOffsetMask);
+  }
+}
+
+void Mips64Assembler::EmitJump(Label* label, bool link) {
+  int offset;
+  if (label->IsBound()) {
+    offset = label->Position() - buffer_.Size();
+  } else {
+    // Use the offset field of the jump instruction for linking the sites.
+    offset = label->position_;
+    label->LinkTo(buffer_.Size());
+  }
+  if (link) {
+    Jal((offset >> 2) & kJumpOffsetMask);
+  } else {
+    J((offset >> 2) & kJumpOffsetMask);
+  }
+}
+
+int32_t Mips64Assembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) {
+  CHECK_ALIGNED(offset, 4);
+  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
+
+  // Properly preserve only the bits supported in the instruction.
+  offset >>= 2;
+  if (is_jump) {
+    offset &= kJumpOffsetMask;
+    return (inst & ~kJumpOffsetMask) | offset;
+  } else {
+    offset &= kBranchOffsetMask;
+    return (inst & ~kBranchOffsetMask) | offset;
+  }
+}
+
+int Mips64Assembler::DecodeBranchOffset(int32_t inst, bool is_jump) {
+  // Sign-extend, then left-shift by 2.
+  if (is_jump) {
+    return (((inst & kJumpOffsetMask) << 6) >> 4);
+  } else {
+    return (((inst & kBranchOffsetMask) << 16) >> 14);
+  }
+}
+
+void Mips64Assembler::Bind(Label* label, bool is_jump) {
+  CHECK(!label->IsBound());
+  int bound_pc = buffer_.Size();
+  while (label->IsLinked()) {
+    int32_t position = label->Position();
+    int32_t next = buffer_.Load<int32_t>(position);
+    int32_t offset = is_jump ? bound_pc - position : bound_pc - position - 4;
+    int32_t encoded = Mips64Assembler::EncodeBranchOffset(offset, next, is_jump);
+    buffer_.Store<int32_t>(position, encoded);
+    label->position_ = Mips64Assembler::DecodeBranchOffset(next, is_jump);
+  }
+  label->BindTo(bound_pc);
+}
+
+void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x20);
+}
+
+void Mips64Assembler::Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x8, rs, rt, imm16);
+}
+
+void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x21);
+}
+
+void Mips64Assembler::Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x9, rs, rt, imm16);
+}
+
+void Mips64Assembler::Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x19, rs, rt, imm16);
+}
+
+void Mips64Assembler::Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x22);
+}
+
+void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x23);
+}
+
+void Mips64Assembler::Mult(GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18);
+}
+
+void Mips64Assembler::Multu(GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19);
+}
+
+void Mips64Assembler::Div(GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a);
+}
+
+void Mips64Assembler::Divu(GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b);
+}
+
+void Mips64Assembler::And(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x24);
+}
+
+void Mips64Assembler::Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0xc, rs, rt, imm16);
+}
+
+void Mips64Assembler::Or(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x25);
+}
+
+void Mips64Assembler::Ori(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0xd, rs, rt, imm16);
+}
+
+void Mips64Assembler::Xor(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x26);
+}
+
+void Mips64Assembler::Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0xe, rs, rt, imm16);
+}
+
+void Mips64Assembler::Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x27);
+}
+
+void Mips64Assembler::Sll(GpuRegister rd, GpuRegister rs, int shamt) {
+  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x00);
+}
+
+void Mips64Assembler::Srl(GpuRegister rd, GpuRegister rs, int shamt) {
+  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x02);
+}
+
+void Mips64Assembler::Sra(GpuRegister rd, GpuRegister rs, int shamt) {
+  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x03);
+}
+
+void Mips64Assembler::Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x04);
+}
+
+void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x06);
+}
+
+void Mips64Assembler::Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x07);
+}
+
+void Mips64Assembler::Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x20, rs, rt, imm16);
+}
+
+void Mips64Assembler::Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x21, rs, rt, imm16);
+}
+
+void Mips64Assembler::Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x23, rs, rt, imm16);
+}
+
+void Mips64Assembler::Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x37, rs, rt, imm16);
+}
+
+void Mips64Assembler::Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x24, rs, rt, imm16);
+}
+
+void Mips64Assembler::Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x25, rs, rt, imm16);
+}
+
+void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) {
+  EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Mfhi(GpuRegister rd) {
+  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10);
+}
+
+void Mips64Assembler::Mflo(GpuRegister rd) {
+  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x12);
+}
+
+void Mips64Assembler::Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x28, rs, rt, imm16);
+}
+
+void Mips64Assembler::Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x29, rs, rt, imm16);
+}
+
+void Mips64Assembler::Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x2b, rs, rt, imm16);
+}
+
+void Mips64Assembler::Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x3f, rs, rt, imm16);
+}
+
+void Mips64Assembler::Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x2a);
+}
+
+void Mips64Assembler::Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x2b);
+}
+
+void Mips64Assembler::Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0xa, rs, rt, imm16);
+}
+
+void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0xb, rs, rt, imm16);
+}
+
+void Mips64Assembler::Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x4, rs, rt, imm16);
+  Nop();
+}
+
+void Mips64Assembler::Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x5, rs, rt, imm16);
+  Nop();
+}
+
+void Mips64Assembler::J(uint32_t address) {
+  EmitJ(0x2, address);
+  Nop();
+}
+
+void Mips64Assembler::Jal(uint32_t address) {
+  EmitJ(0x2, address);
+  Nop();
+}
+
+void Mips64Assembler::Jr(GpuRegister rs) {
+  EmitR(0, rs, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), 0, 0x09);  // Jalr zero, rs
+  Nop();
+}
+
+void Mips64Assembler::Jalr(GpuRegister rs) {
+  EmitR(0, rs, static_cast<GpuRegister>(0), RA, 0, 0x09);
+  Nop();
+}
+
+void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
+}
+
+void Mips64Assembler::SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x10, ft, fs, fd, 0x1);
+}
+
+void Mips64Assembler::MulS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x10, ft, fs, fd, 0x2);
+}
+
+void Mips64Assembler::DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x10, ft, fs, fd, 0x3);
+}
+
+void Mips64Assembler::AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
+         static_cast<FpuRegister>(fd), 0x0);
+}
+
+void Mips64Assembler::SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
+         static_cast<FpuRegister>(fd), 0x1);
+}
+
+void Mips64Assembler::MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
+         static_cast<FpuRegister>(fd), 0x2);
+}
+
+void Mips64Assembler::DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
+         static_cast<FpuRegister>(fd), 0x3);
+}
+
+void Mips64Assembler::MovS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x6);
+}
+
+void Mips64Assembler::MovD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), static_cast<FpuRegister>(fs),
+         static_cast<FpuRegister>(fd), 0x6);
+}
+
+void Mips64Assembler::Mfc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
+void Mips64Assembler::Mtc1(FpuRegister ft, GpuRegister rs) {
+  EmitFR(0x11, 0x04, ft, static_cast<FpuRegister>(rs), static_cast<FpuRegister>(0), 0x0);
+}
+
+void Mips64Assembler::Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x31, rs, static_cast<GpuRegister>(ft), imm16);
+}
+
+void Mips64Assembler::Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x35, rs, static_cast<GpuRegister>(ft), imm16);
+}
+
+void Mips64Assembler::Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x39, rs, static_cast<GpuRegister>(ft), imm16);
+}
+
+void Mips64Assembler::Sdc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x3d, rs, static_cast<GpuRegister>(ft), imm16);
+}
+
+void Mips64Assembler::Break() {
+  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0),
+        static_cast<GpuRegister>(0), 0, 0xD);
+}
+
+void Mips64Assembler::Nop() {
+  EmitR(0x0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0),
+        static_cast<GpuRegister>(0), 0, 0x0);
+}
+
+void Mips64Assembler::Move(GpuRegister rt, GpuRegister rs) {
+  EmitI(0x19, rs, rt, 0);   // Daddiu
+}
+
+void Mips64Assembler::Clear(GpuRegister rt) {
+  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rt, 0, 0x20);
+}
+
+void Mips64Assembler::Not(GpuRegister rt, GpuRegister rs) {
+  EmitR(0, static_cast<GpuRegister>(0), rs, rt, 0, 0x27);
+}
+
+void Mips64Assembler::Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  Mult(rs, rt);
+  Mflo(rd);
+}
+
+void Mips64Assembler::Div(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  Div(rs, rt);
+  Mflo(rd);
+}
+
+void Mips64Assembler::Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  Div(rs, rt);
+  Mfhi(rd);
+}
+
+void Mips64Assembler::AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value) {
+  CHECK((value >= -32768) && (value <= 32766));
+  Daddiu(rt, rs, value);
+}
+
+void Mips64Assembler::LoadImmediate64(GpuRegister rt, int32_t value) {
+  CHECK((value >= -32768) && (value <= 32766));
+  Daddiu(rt, ZERO, value);
+}
+
+void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
+                                     int32_t offset) {
+  switch (type) {
+    case kLoadSignedByte:
+      Lb(reg, base, offset);
+      break;
+    case kLoadUnsignedByte:
+      Lbu(reg, base, offset);
+      break;
+    case kLoadSignedHalfword:
+      Lh(reg, base, offset);
+      break;
+    case kLoadUnsignedHalfword:
+      Lhu(reg, base, offset);
+      break;
+    case kLoadWord:
+      Lw(reg, base, offset);
+      break;
+    case kLoadDoubleword:
+      // TODO: alignment issues ???
+      Ld(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
+                                        int32_t offset) {
+  CHECK((offset >= -32768) && (offset <= 32766));
+  switch (type) {
+    case kLoadWord:
+      Lwc1(reg, base, offset);
+      break;
+    case kLoadDoubleword:
+      // TODO: alignment issues ???
+      Ldc1(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset,
+                               size_t size) {
+  Mips64ManagedRegister dst = m_dst.AsMips64();
+  if (dst.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dst;
+  } else if (dst.IsGpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size) << dst;
+      LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset);
+    } else if (size == 8) {
+      CHECK_EQ(8u, size) << dst;
+      LoadFromOffset(kLoadDoubleword, dst.AsGpuRegister(), src_register, src_offset);
+    } else {
+      UNIMPLEMENTED(FATAL) << "We only support Load() of size 4 and 8";
+    }
+  } else if (dst.IsFpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size) << dst;
+      LoadFpuFromOffset(kLoadWord, dst.AsFpuRegister(), src_register, src_offset);
+    } else if (size == 8) {
+      CHECK_EQ(8u, size) << dst;
+      LoadFpuFromOffset(kLoadDoubleword, dst.AsFpuRegister(), src_register, src_offset);
+    } else {
+      UNIMPLEMENTED(FATAL) << "We only support Load() of size 4 and 8";
+    }
+  }
+}
+
+void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
+                                    int32_t offset) {
+  switch (type) {
+    case kStoreByte:
+      Sb(reg, base, offset);
+      break;
+    case kStoreHalfword:
+      Sh(reg, base, offset);
+      break;
+    case kStoreWord:
+      Sw(reg, base, offset);
+      break;
+    case kStoreDoubleword:
+      // TODO: alignment issues ???
+      Sd(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
+                                       int32_t offset) {
+  switch (type) {
+    case kStoreWord:
+      Swc1(reg, base, offset);
+      break;
+    case kStoreDoubleword:
+      Sdc1(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+constexpr size_t kFramePointerSize = 8;
+
+void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                                 const std::vector<ManagedRegister>& callee_save_regs,
+                                 const ManagedRegisterEntrySpills& entry_spills) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+
+  // Increase frame to required size.
+  IncreaseFrameSize(frame_size);
+
+  // Push callee saves and return address
+  int stack_offset = frame_size - kFramePointerSize;
+  StoreToOffset(kStoreDoubleword, RA, SP, stack_offset);
+  for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
+    stack_offset -= kFramePointerSize;
+    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
+  }
+
+  // Write out Method*.
+  StoreToOffset(kStoreWord, method_reg.AsMips64().AsGpuRegister(), SP, 0);
+
+  // Write out entry spills.
+  int32_t offset = frame_size + sizeof(StackReference<mirror::ArtMethod>);
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    Mips64ManagedRegister reg = entry_spills.at(i).AsMips64();
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    int32_t size = spill.getSize();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      offset += size;
+    } else if (reg.IsFpuRegister()) {
+      StoreFpuToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsFpuRegister(), SP, offset);
+      offset += size;
+    } else if (reg.IsGpuRegister()) {
+      StoreToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsGpuRegister(), SP, offset);
+      offset += size;
+    }
+  }
+}
+
+void Mips64Assembler::RemoveFrame(size_t frame_size,
+                                  const std::vector<ManagedRegister>& callee_save_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+
+  // Pop callee saves and return address
+  int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
+  for (size_t i = 0; i < callee_save_regs.size(); ++i) {
+    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
+    stack_offset += kFramePointerSize;
+  }
+  LoadFromOffset(kLoadDoubleword, RA, SP, stack_offset);
+
+  // Decrease frame to required size.
+  DecreaseFrameSize(frame_size);
+
+  // Then jump to the return address.
+  Jr(RA);
+}
+
+void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant64(SP, SP, -adjust);
+}
+
+void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant64(SP, SP, adjust);
+}
+
+void Mips64Assembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
+  Mips64ManagedRegister src = msrc.AsMips64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsGpuRegister()) {
+    CHECK(size == 4 || size == 8) << size;
+    if (size == 8) {
+      StoreToOffset(kStoreDoubleword, src.AsGpuRegister(), SP, dest.Int32Value());
+    } else if (size == 4) {
+      StoreToOffset(kStoreWord, src.AsGpuRegister(), SP, dest.Int32Value());
+    } else {
+      UNIMPLEMENTED(FATAL) << "We only support Store() of size 4 and 8";
+    }
+  } else if (src.IsFpuRegister()) {
+    CHECK(size == 4 || size == 8) << size;
+    if (size == 8) {
+      StoreFpuToOffset(kStoreDoubleword, src.AsFpuRegister(), SP, dest.Int32Value());
+    } else if (size == 4) {
+      StoreFpuToOffset(kStoreWord, src.AsFpuRegister(), SP, dest.Int32Value());
+    } else {
+      UNIMPLEMENTED(FATAL) << "We only support Store() of size 4 and 8";
+    }
+  }
+}
+
+void Mips64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  Mips64ManagedRegister src = msrc.AsMips64();
+  CHECK(src.IsGpuRegister());
+  StoreToOffset(kStoreWord, src.AsGpuRegister(), SP, dest.Int32Value());
+}
+
+void Mips64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  Mips64ManagedRegister src = msrc.AsMips64();
+  CHECK(src.IsGpuRegister());
+  StoreToOffset(kStoreDoubleword, src.AsGpuRegister(), SP, dest.Int32Value());
+}
+
+void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
+                                            ManagedRegister mscratch) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  LoadImmediate64(scratch.AsGpuRegister(), imm);
+  StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
+}
+
+void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
+                                               ManagedRegister mscratch) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  LoadImmediate64(scratch.AsGpuRegister(), imm);
+  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value());
+}
+
+void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
+                                                 FrameOffset fr_offs,
+                                                 ManagedRegister mscratch) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  AddConstant64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
+  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
+}
+
+void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
+  StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
+}
+
+void Mips64Assembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc,
+                                    FrameOffset in_off, ManagedRegister mscratch) {
+  Mips64ManagedRegister src = msrc.AsMips64();
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  StoreToOffset(kStoreDoubleword, src.AsGpuRegister(), SP, dest.Int32Value());
+  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), SP, in_off.Int32Value());
+  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, dest.Int32Value() + 8);
+}
+
+void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  return EmitLoad(mdest, SP, src.Int32Value(), size);
+}
+
+void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
+  return EmitLoad(mdest, S1, src.Int32Value(), size);
+}
+
+void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
+  Mips64ManagedRegister dest = mdest.AsMips64();
+  CHECK(dest.IsGpuRegister());
+  LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value());
+}
+
+void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base,
+                            MemberOffset offs) {
+  Mips64ManagedRegister dest = mdest.AsMips64();
+  CHECK(dest.IsGpuRegister() && dest.IsGpuRegister());
+  LoadFromOffset(kLoadWord, dest.AsGpuRegister(),
+                 base.AsMips64().AsGpuRegister(), offs.Int32Value());
+  if (kPoisonHeapReferences) {
+    Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
+  }
+}
+
+void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
+                               Offset offs) {
+  Mips64ManagedRegister dest = mdest.AsMips64();
+  CHECK(dest.IsGpuRegister() && dest.IsGpuRegister()) << dest;
+  LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(),
+                 base.AsMips64().AsGpuRegister(), offs.Int32Value());
+}
+
+void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
+                                         ThreadOffset<8> offs) {
+  Mips64ManagedRegister dest = mdest.AsMips64();
+  CHECK(dest.IsGpuRegister());
+  LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
+}
+
+void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips";
+}
+
+void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips";
+}
+
+void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  Mips64ManagedRegister dest = mdest.AsMips64();
+  Mips64ManagedRegister src = msrc.AsMips64();
+  if (!dest.Equals(src)) {
+    if (dest.IsGpuRegister()) {
+      CHECK(src.IsGpuRegister()) << src;
+      Move(dest.AsGpuRegister(), src.AsGpuRegister());
+    } else if (dest.IsFpuRegister()) {
+      CHECK(src.IsFpuRegister()) << src;
+      if (size == 4) {
+        MovS(dest.AsFpuRegister(), src.AsFpuRegister());
+      } else if (size == 8) {
+        MovD(dest.AsFpuRegister(), src.AsFpuRegister());
+      } else {
+        UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+      }
+    }
+  }
+}
+
+void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
+                              ManagedRegister mscratch) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP, src.Int32Value());
+  StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
+}
+
+void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
+                                             ThreadOffset<8> thr_offs,
+                                             ManagedRegister mscratch) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
+  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
+}
+
+void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
+                                           FrameOffset fr_offs,
+                                           ManagedRegister mscratch) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
+                 SP, fr_offs.Int32Value());
+  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(),
+                S1, thr_offs.Int32Value());
+}
+
+void Mips64Assembler::Copy(FrameOffset dest, FrameOffset src,
+                           ManagedRegister mscratch, size_t size) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP, src.Int32Value());
+    StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), SP, src.Int32Value());
+    StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Mips64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
+                         ManagedRegister mscratch, size_t size) {
+  GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadFromOffset(kLoadWord, scratch, src_base.AsMips64().AsGpuRegister(),
+                   src_offset.Int32Value());
+    StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(kLoadDoubleword, scratch, src_base.AsMips64().AsGpuRegister(),
+                   src_offset.Int32Value());
+    StoreToOffset(kStoreDoubleword, scratch, SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
+                         ManagedRegister mscratch, size_t size) {
+  GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
+    StoreToOffset(kStoreWord, scratch, dest_base.AsMips64().AsGpuRegister(),
+                  dest_offset.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(kLoadDoubleword, scratch, SP, src.Int32Value());
+    StoreToOffset(kStoreDoubleword, scratch, dest_base.AsMips64().AsGpuRegister(),
+                  dest_offset.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
+                         ManagedRegister /*mscratch*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+}
+
+void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
+                         ManagedRegister src, Offset src_offset,
+                         ManagedRegister mscratch, size_t size) {
+  GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadFromOffset(kLoadWord, scratch, src.AsMips64().AsGpuRegister(), src_offset.Int32Value());
+    StoreToOffset(kStoreWord, scratch, dest.AsMips64().AsGpuRegister(), dest_offset.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(kLoadDoubleword, scratch, src.AsMips64().AsGpuRegister(),
+                   src_offset.Int32Value());
+    StoreToOffset(kStoreDoubleword, scratch, dest.AsMips64().AsGpuRegister(),
+                  dest_offset.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset
+/*src_offset*/,
+                         ManagedRegister /*mscratch*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+}
+
+void Mips64Assembler::MemoryBarrier(ManagedRegister) {
+  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+}
+
+void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                    FrameOffset handle_scope_offset,
+                                    ManagedRegister min_reg, bool null_allowed) {
+  Mips64ManagedRegister out_reg = mout_reg.AsMips64();
+  Mips64ManagedRegister in_reg = min_reg.AsMips64();
+  CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg;
+  CHECK(out_reg.IsGpuRegister()) << out_reg;
+  if (null_allowed) {
+    Label null_arg;
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(),
+                     SP, handle_scope_offset.Int32Value());
+      in_reg = out_reg;
+    }
+    if (!out_reg.Equals(in_reg)) {
+      LoadImmediate64(out_reg.AsGpuRegister(), 0);
+    }
+    EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true);
+    AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg, false);
+  } else {
+    AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+  }
+}
+
+void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                    FrameOffset handle_scope_offset,
+                                    ManagedRegister mscratch,
+                                    bool null_allowed) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  if (null_allowed) {
+    Label null_arg;
+    LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP,
+                   handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
+    EmitBranch(scratch.AsGpuRegister(), ZERO, &null_arg, true);
+    AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg, false);
+  } else {
+    AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+  }
+  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, out_off.Int32Value());
+}
+
+// Given a handle scope entry, load the associated reference.
+void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                          ManagedRegister min_reg) {
+  Mips64ManagedRegister out_reg = mout_reg.AsMips64();
+  Mips64ManagedRegister in_reg = min_reg.AsMips64();
+  CHECK(out_reg.IsGpuRegister()) << out_reg;
+  CHECK(in_reg.IsGpuRegister()) << in_reg;
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    LoadImmediate64(out_reg.AsGpuRegister(), 0);
+  }
+  EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true);
+  LoadFromOffset(kLoadDoubleword, out_reg.AsGpuRegister(),
+                 in_reg.AsGpuRegister(), 0);
+  Bind(&null_arg, false);
+}
+
+void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) {
+  Mips64ManagedRegister base = mbase.AsMips64();
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(base.IsGpuRegister()) << base;
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
+                 base.AsGpuRegister(), offset.Int32Value());
+  Jalr(scratch.AsGpuRegister());
+  // TODO: place reference map on call
+}
+
+void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  // Call *(*(SP + base) + offset)
+  LoadFromOffset(kLoadWord, scratch.AsGpuRegister(),
+                 SP, base.Int32Value());
+  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
+                 scratch.AsGpuRegister(), offset.Int32Value());
+  Jalr(scratch.AsGpuRegister());
+  // TODO: place reference map on call
+}
+
+void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) {
+  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+}
+
+void Mips64Assembler::GetCurrentThread(ManagedRegister tr) {
+  Move(tr.AsMips64().AsGpuRegister(), S1);
+}
+
+void Mips64Assembler::GetCurrentThread(FrameOffset offset,
+                                     ManagedRegister /*mscratch*/) {
+  StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value());
+}
+
+void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust);
+  buffer_.EnqueueSlowPath(slow);
+  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
+                 S1, Thread::ExceptionOffset<8>().Int32Value());
+  EmitBranch(scratch.AsGpuRegister(), ZERO, slow->Entry(), false);
+}
+
+void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
+  Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_, false);
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    __ DecreaseFrameSize(stack_adjust_);
+  }
+  // Pass exception object as argument
+  // Don't care about preserving A0 as this call won't return
+  __ Move(A0, scratch_.AsGpuRegister());
+  // Set up call to Thread::Current()->pDeliverException
+  __ LoadFromOffset(kLoadDoubleword, T9, S1,
+                    QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
+  __ Jr(T9);
+  // Call never returns
+  __ Break();
+#undef __
+}
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
new file mode 100644
index 0000000000..36e74d7cb2
--- /dev/null
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
+#define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
+
+#include <vector>
+
+#include "base/macros.h"
+#include "constants_mips64.h"
+#include "globals.h"
+#include "managed_register_mips64.h"
+#include "utils/assembler.h"
+#include "offsets.h"
+#include "utils.h"
+
+namespace art {
+namespace mips64 {
+
+enum LoadOperandType {
+  kLoadSignedByte,
+  kLoadUnsignedByte,
+  kLoadSignedHalfword,
+  kLoadUnsignedHalfword,
+  kLoadWord,
+  kLoadDoubleword
+};
+
+enum StoreOperandType {
+  kStoreByte,
+  kStoreHalfword,
+  kStoreWord,
+  kStoreDoubleword
+};
+
+class Mips64Assembler FINAL : public Assembler {
+ public:
+  Mips64Assembler() {}
+  virtual ~Mips64Assembler() {}
+
+  // Emit Machine Instructions.
+  void Add(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Mult(GpuRegister rs, GpuRegister rt);
+  void Multu(GpuRegister rs, GpuRegister rt);
+  void Div(GpuRegister rs, GpuRegister rt);
+  void Divu(GpuRegister rs, GpuRegister rt);
+
+  void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Or(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Ori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Xor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+
+  void Sll(GpuRegister rd, GpuRegister rs, int shamt);
+  void Srl(GpuRegister rd, GpuRegister rs, int shamt);
+  void Sra(GpuRegister rd, GpuRegister rs, int shamt);
+  void Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+
+  void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lui(GpuRegister rt, uint16_t imm16);
+  void Mfhi(GpuRegister rd);
+  void Mflo(GpuRegister rd);
+
+  void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+
+  void Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+
+  void Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void J(uint32_t address);
+  void Jal(uint32_t address);
+  void Jr(GpuRegister rs);
+  void Jalr(GpuRegister rs);
+
+  void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void MulS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void MovS(FpuRegister fd, FpuRegister fs);
+  void MovD(FpuRegister fd, FpuRegister fs);
+
+  void Mfc1(GpuRegister rt, FpuRegister fs);
+  void Mtc1(FpuRegister ft, GpuRegister rs);
+  void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
+  void Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
+  void Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
+  void Sdc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
+
+  void Break();
+  void Nop();
+  void Move(GpuRegister rt, GpuRegister rs);
+  void Clear(GpuRegister rt);
+  void Not(GpuRegister rt, GpuRegister rs);
+  void Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Div(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+
+  void AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value);
+  void LoadImmediate64(GpuRegister rt, int32_t value);
+
+  void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
+  void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
+  void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
+  void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
+  void StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
+
+  // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
+  void Emit(int32_t value);
+  void EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal);
+  void EmitJump(Label* label, bool link);
+  void Bind(Label* label, bool is_jump);
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                  const std::vector<ManagedRegister>& callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size,
+                   const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
+
+  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
+                                ManagedRegister mscratch) OVERRIDE;
+
+  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+                                  ManagedRegister mscratch) OVERRIDE;
+
+  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
+                     ManagedRegister mscratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
+
+  void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
+
+  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
+                              ManagedRegister mscratch) OVERRIDE;
+
+  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+                            ManagedRegister mscratch) OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
+            ManagedRegister mscratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
+            ManagedRegister mscratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+            ManagedRegister mscratch, size_t size) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // NULL.
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
+                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be NULL if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister
+                              mscratch, bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
+  void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
+  void EmitJ(int opcode, int address);
+  void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
+  void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
+
+  int32_t EncodeBranchOffset(int offset, int32_t inst, bool is_jump);
+  int DecodeBranchOffset(int32_t inst, bool is_jump);
+
+  DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
+};
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class Mips64ExceptionSlowPath FINAL : public SlowPath {
+ public:
+  explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const Mips64ManagedRegister scratch_;
+  const size_t stack_adjust_;
+};
+
+}  // namespace mips64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h
new file mode 100644
index 0000000000..8b7697cac3
--- /dev/null
+++ b/compiler/utils/mips64/constants_mips64.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_MIPS64_CONSTANTS_MIPS64_H_
+#define ART_COMPILER_UTILS_MIPS64_CONSTANTS_MIPS64_H_
+
+#include <iosfwd>
+
+#include "arch/mips64/registers_mips64.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "globals.h"
+
+namespace art {
+namespace mips64 {
+
+// Constants used for the decoding or encoding of the individual fields of instructions.
+enum InstructionFields {
+  kOpcodeShift = 26,
+  kOpcodeBits = 6,
+  kRsShift = 21,
+  kRsBits = 5,
+  kRtShift = 16,
+  kRtBits = 5,
+  kRdShift = 11,
+  kRdBits = 5,
+  kShamtShift = 6,
+  kShamtBits = 5,
+  kFunctShift = 0,
+  kFunctBits = 6,
+
+  kFmtShift = 21,
+  kFmtBits = 5,
+  kFtShift = 16,
+  kFtBits = 5,
+  kFsShift = 11,
+  kFsBits = 5,
+  kFdShift = 6,
+  kFdBits = 5,
+
+  kBranchOffsetMask = 0x0000ffff,
+  kJumpOffsetMask = 0x03ffffff,
+};
+
+enum ScaleFactor {
+  TIMES_1 = 0,
+  TIMES_2 = 1,
+  TIMES_4 = 2,
+  TIMES_8 = 3
+};
+
+class Instr {
+ public:
+  static const uint32_t kBreakPointInstruction = 0x0000000D;
+
+  bool IsBreakPoint() {
+    return ((*reinterpret_cast<const uint32_t*>(this)) & 0xFC0000CF) == kBreakPointInstruction;
+  }
+
+  // Instructions are read out of a code stream. The only way to get a
+  // reference to an instruction is to convert a pointer. There is no way
+  // to allocate or create instances of class Instr.
+  // Use the At(pc) function to create references to Instr.
+  static Instr* At(uintptr_t pc) { return reinterpret_cast<Instr*>(pc); }
+
+ private:
+  DISALLOW_IMPLICIT_CONSTRUCTORS(Instr);
+};
+
+}  // namespace mips64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_MIPS64_CONSTANTS_MIPS64_H_
diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc
new file mode 100644
index 0000000000..dea396e4a7
--- /dev/null
+++ b/compiler/utils/mips64/managed_register_mips64.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_mips64.h"
+
+#include "globals.h"
+
+namespace art {
+namespace mips64 {
+
+bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const {
+  if (IsNoRegister() || other.IsNoRegister()) return false;
+  CHECK(IsValidManagedRegister());
+  CHECK(other.IsValidManagedRegister());
+  if (Equals(other)) return true;
+  return false;
+}
+
+void Mips64ManagedRegister::Print(std::ostream& os) const {
+  if (!IsValidManagedRegister()) {
+    os << "No Register";
+  } else if (IsGpuRegister()) {
+    os << "GPU: " << static_cast<int>(AsGpuRegister());
+  } else if (IsFpuRegister()) {
+     os << "FpuRegister: " << static_cast<int>(AsFpuRegister());
+  } else {
+    os << "??: " << RegId();
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const Mips64ManagedRegister& reg) {
+  reg.Print(os);
+  return os;
+}
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
new file mode 100644
index 0000000000..924a928389
--- /dev/null
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_
+#define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_
+
+#include "constants_mips64.h"
+#include "utils/managed_register.h"
+
+namespace art {
+namespace mips64 {
+
+const int kNumberOfGpuRegIds = kNumberOfGpuRegisters;
+const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters;
+
+const int kNumberOfFpuRegIds = kNumberOfFpuRegisters;
+const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters;
+
+const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds;
+const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds;
+
+// An instance of class 'ManagedRegister' represents a single GPU register (enum
+// Register) or a double precision FP register (enum FpuRegister)
+// 'ManagedRegister::NoRegister()' provides an invalid register.
+// There is a one-to-one mapping between ManagedRegister and register id.
+class Mips64ManagedRegister : public ManagedRegister {
+ public:
+  GpuRegister AsGpuRegister() const {
+    CHECK(IsGpuRegister());
+    return static_cast<GpuRegister>(id_);
+  }
+
+  FpuRegister AsFpuRegister() const {
+    CHECK(IsFpuRegister());
+    return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
+  }
+
+  bool IsGpuRegister() const {
+    CHECK(IsValidManagedRegister());
+    return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
+  }
+
+  bool IsFpuRegister() const {
+    CHECK(IsValidManagedRegister());
+    const int test = id_ - kNumberOfGpuRegIds;
+    return (0 <= test) && (test < kNumberOfFpuRegIds);
+  }
+
+  void Print(std::ostream& os) const;
+
+  // Returns true if the two managed-registers ('this' and 'other') overlap.
+  // Either managed-register may be the NoRegister. If both are the NoRegister
+  // then false is returned.
+  bool Overlaps(const Mips64ManagedRegister& other) const;
+
+  static Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
+    CHECK_NE(r, kNoGpuRegister);
+    return FromRegId(r);
+  }
+
+  static Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
+    CHECK_NE(r, kNoFpuRegister);
+    return FromRegId(r + kNumberOfGpuRegIds);
+  }
+
+ private:
+  bool IsValidManagedRegister() const {
+    return (0 <= id_) && (id_ < kNumberOfRegIds);
+  }
+
+  int RegId() const {
+    CHECK(!IsNoRegister());
+    return id_;
+  }
+
+  int AllocId() const {
+    CHECK(IsValidManagedRegister());
+    CHECK_LT(id_, kNumberOfAllocIds);
+    return id_;
+  }
+
+  int AllocIdLow() const;
+  int AllocIdHigh() const;
+
+  friend class ManagedRegister;
+
+  explicit Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+
+  static Mips64ManagedRegister FromRegId(int reg_id) {
+    Mips64ManagedRegister reg(reg_id);
+    CHECK(reg.IsValidManagedRegister());
+    return reg;
+  }
+};
+
+std::ostream& operator<<(std::ostream& os, const Mips64ManagedRegister& reg);
+
+}  // namespace mips64
+
+inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
+  mips64::Mips64ManagedRegister reg(id_);
+  CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
+  return reg;
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index d843a7213f..c7414a12fc 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -184,6 +184,20 @@ void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
   EmitImmediate(imm);
 }
 
+
+void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
+  cmov(c, dst, src, true);
+}
+
+void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0x40 + c);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+
 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalByteRegNormalizingRex32(dst, src);
@@ -369,19 +383,26 @@ void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
 
 
 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
+  movd(dst, src, true);
+}
+
+void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
+  movd(dst, src, true);
+}
+
+void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
-  EmitRex64(dst, src);
+  EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
   EmitUint8(0x0F);
   EmitUint8(0x6E);
   EmitOperand(dst.LowBits(), Operand(src));
 }
 
-
-void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
+void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
-  EmitRex64(src, dst);
+  EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
   EmitUint8(0x0F);
   EmitUint8(0x7E);
   EmitOperand(src.LowBits(), Operand(dst));
@@ -826,6 +847,39 @@ void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x54);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x54);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x56);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x56);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
 
 void X86_64Assembler::fldl(const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1757,6 +1811,20 @@ void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
   EmitUint8(0xC0 + dst.LowBits());
 }
 
+void X86_64Assembler::bswapl(CpuRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex(false, false, false, false, dst.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0xC8 + dst.LowBits());
+}
+
+void X86_64Assembler::bswapq(CpuRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex(false, true, false, false, dst.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0xC8 + dst.LowBits());
+}
+
 
 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   // TODO: Need to have a code constants table.
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index ac8bc9ab49..5c8d608cf2 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -276,6 +276,9 @@ class X86_64Assembler FINAL : public Assembler {
   void movl(const Address& dst, CpuRegister src);
   void movl(const Address& dst, const Immediate& imm);
 
+  void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
+  void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
+
   void movzxb(CpuRegister dst, CpuRegister src);
   void movzxb(CpuRegister dst, const Address& src);
   void movsxb(CpuRegister dst, CpuRegister src);
@@ -303,8 +306,10 @@ class X86_64Assembler FINAL : public Assembler {
   void movsxd(CpuRegister dst, CpuRegister src);
   void movsxd(CpuRegister dst, const Address& src);
 
-  void movd(XmmRegister dst, CpuRegister src);
-  void movd(CpuRegister dst, XmmRegister src);
+  void movd(XmmRegister dst, CpuRegister src);  // Note: this is the r64 version, formally movq.
+  void movd(CpuRegister dst, XmmRegister src);  // Note: this is the r64 version, formally movq.
+  void movd(XmmRegister dst, CpuRegister src, bool is64bit);
+  void movd(CpuRegister dst, XmmRegister src, bool is64bit);
 
   void addss(XmmRegister dst, XmmRegister src);
   void addss(XmmRegister dst, const Address& src);
@@ -360,6 +365,11 @@ class X86_64Assembler FINAL : public Assembler {
   void xorps(XmmRegister dst, XmmRegister src);
 
   void andpd(XmmRegister dst, const Address& src);
+  void andpd(XmmRegister dst, XmmRegister src);
+  void andps(XmmRegister dst, XmmRegister src);
+
+  void orpd(XmmRegister dst, XmmRegister src);
+  void orps(XmmRegister dst, XmmRegister src);
 
   void flds(const Address& src);
   void fstps(const Address& dst);
@@ -504,6 +514,9 @@ class X86_64Assembler FINAL : public Assembler {
 
   void setcc(Condition condition, CpuRegister dst);
 
+  void bswapl(CpuRegister dst);
+  void bswapq(CpuRegister dst);
+
   //
   // Macros for High-level operations.
   //
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index b8d724d771..6df4144004 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -330,7 +330,6 @@ std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Ass
     assembler->shlq(*reg, shifter);
     str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
   }
-  printf("%s\n", str.str().c_str());
 
   return str.str();
 }
@@ -690,6 +689,22 @@ TEST_F(AssemblerX86_64Test, Xorpd) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
 }
 
+TEST_F(AssemblerX86_64Test, Andps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andps, "andps %{reg2}, %{reg1}"), "andps");
+}
+
+TEST_F(AssemblerX86_64Test, Andpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd");
+}
+
+TEST_F(AssemblerX86_64Test, Orps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps");
+}
+
+TEST_F(AssemblerX86_64Test, Orpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
+}
+
 // X87
 
 std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
@@ -758,6 +773,14 @@ TEST_F(AssemblerX86_64Test, RetAndLeave) {
 // MISC //
 //////////
 
+TEST_F(AssemblerX86_64Test, Bswapl) {
+  DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl");
+}
+
+TEST_F(AssemblerX86_64Test, Bswapq) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::bswapq, "bswap %{reg}"), "bswapq");
+}
+
 std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
                           x86_64::X86_64Assembler* assembler) {
   // From Condition