104 files changed, 2571 insertions, 1219 deletions
diff --git a/Android.mk b/Android.mk
index f7f65acd98..593ee0422e 100644
--- a/Android.mk
+++ b/Android.mk
@@ -189,6 +189,13 @@ test-art-host-oat-interpreter: $(ART_TEST_HOST_OAT_INTERPRETER_TARGETS)
 test-art-host-oat: test-art-host-oat-default test-art-host-oat-interpreter
 	@echo test-art-host-oat PASSED
 
+FAILING_OPTIMIZING_MESSAGE := failed with the optimizing compiler. If the test passes \
+  with Quick and interpreter, it is probably just a bug in the optimizing compiler. Please \
+  add the test name to the FAILING_OPTIMIZING_TESTS Makefile variable in art/Android.mk, \
+  and file a bug.
+
+# Placeholder for failing tests on the optimizing compiler.
+
 define declare-test-art-host-run-test
 .PHONY: test-art-host-run-test-default-$(1)
 test-art-host-run-test-default-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
@@ -197,6 +204,14 @@ test-art-host-run-test-default-$(1): test-art-host-dependencies $(DX) $(HOST_OUT
 
 TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS += test-art-host-run-test-default-$(1)
 
+.PHONY: test-art-host-run-test-optimizing-$(1)
+test-art-host-run-test-optimizing-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test -Xcompiler-option --compiler-backend=Optimizing $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host $(1) \
+	|| (echo -e "\x1b[31;01mTest $(1) $(FAILING_OPTIMIZING_MESSAGE)\x1b[0m" && exit 1)
+	@echo test-art-host-run-test-optimizing-$(1) PASSED
+
+TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS += test-art-host-run-test-optimizing-$(1)
+
 .PHONY: test-art-host-run-test-interpreter-$(1)
 test-art-host-run-test-interpreter-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
 	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host --interpreter $(1)
@@ -205,7 +220,7 @@ test-art-host-run-test-interpreter-$(1): test-art-host-dependencies $(DX) $(HOST
 TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS += test-art-host-run-test-interpreter-$(1)
 
 .PHONY: test-art-host-run-test-$(1)
-test-art-host-run-test-$(1): test-art-host-run-test-default-$(1) test-art-host-run-test-interpreter-$(1)
+test-art-host-run-test-$(1): test-art-host-run-test-default-$(1) test-art-host-run-test-interpreter-$(1) test-art-host-run-test-optimizing-$(1)
 
 endef
 
@@ -215,12 +230,21 @@ $(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call declare-test-art-host-run-t
 test-art-host-run-test-default: $(TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS)
 	@echo test-art-host-run-test-default PASSED
 
+FAILING_OPTIMIZING_TESTS :=
+$(foreach test, $(FAILING_OPTIMIZING_TESTS), \
+	$(eval TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS := $(filter-out test-art-host-run-test-optimizing-$(test), $(TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS))))
+
+.PHONY: test-art-host-run-test-optimizing
+test-art-host-run-test-optimizing: $(TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS)
+	$(foreach test, $(FAILING_OPTIMIZING_TESTS), $(info Optimizing compiler has skipped $(test)))
+	@echo test-art-host-run-test-optimizing PASSED
+
 .PHONY: test-art-host-run-test-interpreter
 test-art-host-run-test-interpreter: $(TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS)
 	@echo test-art-host-run-test-interpreter PASSED
 
 .PHONY: test-art-host-run-test
-test-art-host-run-test: test-art-host-run-test-default test-art-host-run-test-interpreter
+test-art-host-run-test: test-art-host-run-test-default test-art-host-run-test-interpreter test-art-host-run-test-optimizing
 	@echo test-art-host-run-test PASSED
 
 ########################################################################
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index 06e259a65f..920cde28aa 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -54,9 +54,9 @@ bool BBCombine::Worker(const PassDataHolder* data) const {
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(const PassDataHolder* data) const {
+void BBOptimizations::Start(PassDataHolder* data) const {
   DCHECK(data != nullptr);
-  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
   DCHECK(c_unit != nullptr);
   /*
    * This pass has a different ordering depEnding on the suppress exception,
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 00947902e7..2b097b5c73 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -31,9 +31,9 @@ class CacheFieldLoweringInfo : public PassME {
   CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheFieldLoweringInfo();
   }
@@ -55,9 +55,9 @@ class CacheMethodLoweringInfo : public PassME {
   CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheMethodLoweringInfo();
   }
@@ -86,9 +86,9 @@ class CallInlining : public PassME {
     return cUnit->mir_graph->InlineCallsGate();
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsStart();
   }
@@ -105,9 +105,9 @@ class CallInlining : public PassME {
     return false;
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsEnd();
   }
@@ -122,9 +122,9 @@ class CodeLayout : public PassME {
   CodeLayout() : PassME("CodeLayout", kAllNodes, kOptimizationBasicBlockChange, "2_post_layout_cfg") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->VerifyDataflow();
   }
@@ -142,9 +142,9 @@ class NullCheckEliminationAndTypeInference : public PassME {
     : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesStart();
   }
@@ -159,9 +159,9 @@ class NullCheckEliminationAndTypeInference : public PassME {
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesEnd();
   }
@@ -189,9 +189,9 @@ class ClassInitCheckElimination : public PassME {
     return cUnit->mir_graph->EliminateClassInitChecks(bb);
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateClassInitChecksEnd();
   }
@@ -232,7 +232,7 @@ class BBOptimizations : public PassME {
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(const PassDataHolder* data) const;
+  void Start(PassDataHolder* data) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 55a4c7815e..de9ac4bd01 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -50,6 +50,8 @@ enum SpecialTargetRegister {
   kArg3,
   kArg4,
   kArg5,
+  kArg6,
+  kArg7,
   kFArg0,
   kFArg1,
   kFArg2,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 035bd66223..b8d190a9eb 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -165,7 +165,7 @@ int arm64_support_list[] = {
     Instruction::CONST_STRING,
     Instruction::MONITOR_ENTER,
     Instruction::MONITOR_EXIT,
-    Instruction::THROW,
+    // Instruction::THROW,
     Instruction::GOTO,
     Instruction::GOTO_16,
     Instruction::GOTO_32,
@@ -295,12 +295,12 @@ int arm64_support_list[] = {
     Instruction::SHL_LONG,
     Instruction::SHR_LONG,
     Instruction::USHR_LONG,
-    Instruction::REM_FLOAT,
+    // Instruction::REM_FLOAT,
     Instruction::ADD_DOUBLE,
     Instruction::SUB_DOUBLE,
     Instruction::MUL_DOUBLE,
     Instruction::DIV_DOUBLE,
-    Instruction::REM_DOUBLE,
+    // Instruction::REM_DOUBLE,
     Instruction::ADD_LONG_2ADDR,
     Instruction::SUB_LONG_2ADDR,
     Instruction::MUL_LONG_2ADDR,
@@ -312,12 +312,12 @@ int arm64_support_list[] = {
     Instruction::SHL_LONG_2ADDR,
     Instruction::SHR_LONG_2ADDR,
     Instruction::USHR_LONG_2ADDR,
-    Instruction::REM_FLOAT_2ADDR,
+    // Instruction::REM_FLOAT_2ADDR,
     Instruction::ADD_DOUBLE_2ADDR,
     Instruction::SUB_DOUBLE_2ADDR,
     Instruction::MUL_DOUBLE_2ADDR,
     Instruction::DIV_DOUBLE_2ADDR,
-    Instruction::REM_DOUBLE_2ADDR,
+    // Instruction::REM_DOUBLE_2ADDR,
     // TODO(Arm64): Enable compiler pass
     // ----- ExtendedMIROpcode -----
     kMirOpPhi,
@@ -336,54 +336,54 @@ int arm64_support_list[] = {
     kMirOpSelect,
 
 #if ARM64_USE_EXPERIMENTAL_OPCODES
-    // Instruction::MOVE_RESULT,
-    // Instruction::MOVE_RESULT_WIDE,
-    // Instruction::MOVE_RESULT_OBJECT,
-    // Instruction::CONST_STRING_JUMBO,
-    // Instruction::CONST_CLASS,
-    // Instruction::CHECK_CAST,
-    // Instruction::INSTANCE_OF,
-    // Instruction::ARRAY_LENGTH,
-    // Instruction::NEW_INSTANCE,
-    // Instruction::NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY_RANGE,
-    // Instruction::FILL_ARRAY_DATA,
+    Instruction::MOVE_RESULT,
+    Instruction::MOVE_RESULT_WIDE,
+    Instruction::MOVE_RESULT_OBJECT,
+    Instruction::CONST_STRING_JUMBO,
+    Instruction::CONST_CLASS,
+    Instruction::CHECK_CAST,
+    Instruction::INSTANCE_OF,
+    Instruction::ARRAY_LENGTH,
+    Instruction::NEW_INSTANCE,
+    Instruction::NEW_ARRAY,
+    Instruction::FILLED_NEW_ARRAY,
+    Instruction::FILLED_NEW_ARRAY_RANGE,
+    Instruction::FILL_ARRAY_DATA,
     // Instruction::UNUSED_3E,
     // Instruction::UNUSED_3F,
     // Instruction::UNUSED_40,
     // Instruction::UNUSED_41,
     // Instruction::UNUSED_42,
     // Instruction::UNUSED_43,
-    // Instruction::AGET,
-    // Instruction::AGET_WIDE,
-    // Instruction::AGET_OBJECT,
-    // Instruction::AGET_BOOLEAN,
-    // Instruction::AGET_BYTE,
-    // Instruction::AGET_CHAR,
-    // Instruction::AGET_SHORT,
-    // Instruction::APUT,
-    // Instruction::APUT_WIDE,
-    // Instruction::APUT_OBJECT,
-    // Instruction::APUT_BOOLEAN,
-    // Instruction::APUT_BYTE,
-    // Instruction::APUT_CHAR,
-    // Instruction::APUT_SHORT,
-    // Instruction::IPUT_WIDE,
-    // Instruction::IGET_WIDE,
-    // Instruction::SGET_WIDE,
-    // Instruction::SPUT_WIDE,
+    Instruction::AGET,
+    Instruction::AGET_WIDE,
+    Instruction::AGET_OBJECT,
+    Instruction::AGET_BOOLEAN,
+    Instruction::AGET_BYTE,
+    Instruction::AGET_CHAR,
+    Instruction::AGET_SHORT,
+    Instruction::APUT,
+    Instruction::APUT_WIDE,
+    Instruction::APUT_OBJECT,
+    Instruction::APUT_BOOLEAN,
+    Instruction::APUT_BYTE,
+    Instruction::APUT_CHAR,
+    Instruction::APUT_SHORT,
+    Instruction::IPUT_WIDE,
+    Instruction::IGET_WIDE,
+    Instruction::SGET_WIDE,
+    Instruction::SPUT_WIDE,
     Instruction::INVOKE_VIRTUAL,
     Instruction::INVOKE_SUPER,
     Instruction::INVOKE_DIRECT,
     Instruction::INVOKE_STATIC,
     Instruction::INVOKE_INTERFACE,
-    // Instruction::RETURN_VOID_BARRIER,
-    // Instruction::INVOKE_VIRTUAL_RANGE,
-    // Instruction::INVOKE_SUPER_RANGE,
-    // Instruction::INVOKE_DIRECT_RANGE,
-    // Instruction::INVOKE_STATIC_RANGE,
-    // Instruction::INVOKE_INTERFACE_RANGE,
+    Instruction::RETURN_VOID_BARRIER,
+    Instruction::INVOKE_VIRTUAL_RANGE,
+    Instruction::INVOKE_SUPER_RANGE,
+    Instruction::INVOKE_DIRECT_RANGE,
+    Instruction::INVOKE_STATIC_RANGE,
+    Instruction::INVOKE_INTERFACE_RANGE,
     // Instruction::UNUSED_79,
     // Instruction::UNUSED_7A,
     // Instruction::IGET_QUICK,
@@ -708,26 +708,15 @@ int x86_64_support_list[] = {
 // (ARM64) Current calling conversion only support 32bit softfp
 //         which has problems with long, float, double
 constexpr char arm64_supported_types[] = "ZBSCILVJFD";
-// (x84_64) We still have troubles with compiling longs/doubles/floats
 constexpr char x86_64_supported_types[] = "ZBSCILVJFD";
 
 // TODO: Remove this when we are able to compile everything.
 static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
   uint32_t shorty_size = strlen(shorty);
   CHECK_GE(shorty_size, 1u);
-  // Set a limitation on maximum number of parameters.
-  // Note : there is an implied "method*" parameter, and probably "this" as well.
-  // 1 is for the return type. Currently, we only accept 2 parameters at the most.
-  // (x86_64): For now we have the same limitation. But we might want to split this
-  //           check in future into two separate cases for arm64 and x86_64.
-  if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) {
-    return false;
-  }
 
-  const char* supported_types = arm64_supported_types;
-  if (instruction_set == kX86_64) {
-    supported_types = x86_64_supported_types;
-  }
+  const char* supported_types =
+      (instruction_set == kX86_64) ? x86_64_supported_types : arm64_supported_types;
   for (uint32_t i = 0; i < shorty_size; i++) {
     if (strchr(supported_types, shorty[i]) == nullptr) {
       return false;
@@ -741,7 +730,7 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set)
 static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                              CompilationUnit& cu) {
   // There is some limitation with current ARM 64 backend.
-  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
+  if (cu.instruction_set == kArm64) {
     // Check if we can compile the prototype.
     const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
     if (!CanCompileShorty(shorty, cu.instruction_set)) {
@@ -756,7 +745,7 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
       support_list_size = arraysize(x86_64_support_list);
     }
 
-    for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
+    for (unsigned int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
       BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx);
       if (bb == NULL) continue;
       if (bb->block_type == kDead) continue;
@@ -792,9 +781,6 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
         }
       }
     }
-
-    LOG(INFO) << "Using experimental instruction set A64 for "
-              << PrettyMethod(method_idx, dex_file);
   }
   return true;
 }
@@ -891,7 +877,6 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver,
   } else if (cu.instruction_set == kArm64) {
     // TODO(Arm64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
-    cu.enable_debug |= (1 << kDebugCodegenDump);
   }
 
   cu.StartTimingSplit("BuildMIRGraph");
@@ -929,7 +914,8 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver,
 
   cu.NewTimingSplit("MIROpt:CheckFilters");
   if (cu.mir_graph->SkipCompilation()) {
-    return NULL;
+    VLOG(compiler) << "Skipping method : " << PrettyMethod(method_idx, dex_file);
+    return nullptr;
   }
 
   /* Create the pass driver and launch it */
@@ -958,6 +944,10 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver,
 
   CompiledMethod* result = NULL;
 
+  if (cu.mir_graph->PuntToInterpreter()) {
+    return NULL;
+  }
+
   cu.cg->Materialize();
 
   cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 2ec17dedaf..1350665097 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1011,7 +1011,7 @@ bool MIRGraph::SkipCompilation() {
   }
 
   // Contains a pattern we don't want to compile?
-  if (punt_to_interpreter_) {
+  if (PuntToInterpreter()) {
     return true;
   }
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index a2676c82ca..63a55707e5 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -586,7 +586,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
   if (current_method_ == 0) {
     DCHECK(entry_block_ == NULL);
     DCHECK(exit_block_ == NULL);
-    DCHECK_EQ(num_blocks_, 0);
+    DCHECK_EQ(num_blocks_, 0U);
     // Use id 0 to represent a null block.
     BasicBlock* null_block = NewMemBB(kNullBlock, num_blocks_++);
     DCHECK_EQ(null_block->id, NullBasicBlockId);
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index b6cec662c3..15c0aa4eaf 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -587,7 +587,7 @@ class MIRGraph {
     return m_units_[m_unit_index]->GetCodeItem()->insns_;
   }
 
-  int GetNumBlocks() const {
+  unsigned int GetNumBlocks() const {
     return num_blocks_;
   }
 
@@ -607,7 +607,7 @@ class MIRGraph {
     return exit_block_;
   }
 
-  BasicBlock* GetBasicBlock(int block_id) const {
+  BasicBlock* GetBasicBlock(unsigned int block_id) const {
     return (block_id == NullBasicBlockId) ? NULL : block_list_.Get(block_id);
   }
 
@@ -958,6 +958,14 @@ class MIRGraph {
   bool SetHigh(int index, bool is_high);
   bool SetHigh(int index);
 
+  bool PuntToInterpreter() {
+    return punt_to_interpreter_;
+  }
+
+  void SetPuntToInterpreter(bool val) {
+    punt_to_interpreter_ = val;
+  }
+
   char* GetDalvikDisassembly(const MIR* mir);
   void ReplaceSpecialChars(std::string& str);
   std::string GetSSAName(int ssa_reg);
@@ -1149,7 +1157,7 @@ class MIRGraph {
   ArenaBitVector* try_block_addr_;
   BasicBlock* entry_block_;
   BasicBlock* exit_block_;
-  int num_blocks_;
+  unsigned int num_blocks_;
   const DexFile::CodeItem* current_code_item_;
   GrowableArray<uint16_t> dex_pc_to_block_map_;  // FindBlock lookup cache.
   std::vector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index b4906d67df..dbb5366af6 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -64,7 +64,7 @@ class Pass {
   /**
    * @brief Start of the pass: called before the Worker function.
    */
-  virtual void Start(const PassDataHolder* data) const {
+  virtual void Start(PassDataHolder* data) const {
     // Unused parameter.
     UNUSED(data);
   }
@@ -72,7 +72,7 @@ class Pass {
   /**
    * @brief End of the pass: called after the WalkBasicBlocks function.
    */
-  virtual void End(const PassDataHolder* data) const {
+  virtual void End(PassDataHolder* data) const {
     // Unused parameter.
     UNUSED(data);
   }
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
index 9efd5aeb40..ff698654cf 100644
--- a/compiler/dex/pass_me.h
+++ b/compiler/dex/pass_me.h
@@ -42,6 +42,7 @@ class PassMEDataHolder: public PassDataHolder {
   public:
     CompilationUnit* c_unit;
     BasicBlock* bb;
+    void* data;
 };
 
 enum DataFlowAnalysisMode {
diff --git a/compiler/dex/post_opt_passes.cc b/compiler/dex/post_opt_passes.cc
index 58700a4bd3..1371652984 100644
--- a/compiler/dex/post_opt_passes.cc
+++ b/compiler/dex/post_opt_passes.cc
@@ -74,9 +74,9 @@ bool ClearPhiInstructions::Worker(const PassDataHolder* data) const {
   return false;
 }
 
-void CalculatePredecessors::Start(const PassDataHolder* data) const {
+void CalculatePredecessors::Start(PassDataHolder* data) const {
   DCHECK(data != nullptr);
-  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
   DCHECK(c_unit != nullptr);
   // First get the MIRGraph here to factorize a bit the code.
   MIRGraph *mir_graph = c_unit->mir_graph.get();
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
index f2035052c9..445c46d038 100644
--- a/compiler/dex/post_opt_passes.h
+++ b/compiler/dex/post_opt_passes.h
@@ -32,11 +32,11 @@ class InitializeData : public PassME {
   InitializeData() : PassME("InitializeData") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     // New blocks may have been inserted so the first thing we do is ensure that
     // the c_unit's number of blocks matches the actual count of basic blocks.
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->InitializeBasicBlockData();
     c_unit->mir_graph.get()->SSATransformationStart();
@@ -78,7 +78,7 @@ class CalculatePredecessors : public PassME {
   CalculatePredecessors() : PassME("CalculatePredecessors") {
   }
 
-  void Start(const PassDataHolder* data) const;
+  void Start(PassDataHolder* data) const;
 };
 
 /**
@@ -90,9 +90,9 @@ class DFSOrders : public PassME {
   DFSOrders() : PassME("DFSOrders") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ComputeDFSOrders();
   }
@@ -107,17 +107,17 @@ class BuildDomination : public PassME {
   BuildDomination() : PassME("BuildDomination") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ComputeDominators();
     c_unit->mir_graph.get()->CompilerInitializeSSAConversion();
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     // Verify the dataflow information after the pass.
     if (c_unit->enable_debug & (1 << kDebugVerifyDataflow)) {
@@ -135,9 +135,9 @@ class DefBlockMatrix : public PassME {
   DefBlockMatrix() : PassME("DefBlockMatrix") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ComputeDefBlockMatrix();
   }
@@ -152,9 +152,9 @@ class CreatePhiNodes : public PassME {
   CreatePhiNodes() : PassME("CreatePhiNodes") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->InsertPhiNodes();
   }
@@ -170,9 +170,9 @@ class ClearVisitedFlag : public PassME {
   ClearVisitedFlag() : PassME("ClearVisitedFlag") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ClearAllVisitedFlags();
   }
@@ -187,9 +187,9 @@ class SSAConversion : public PassME {
   SSAConversion() : PassME("SSAConversion") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     MIRGraph *mir_graph = c_unit->mir_graph.get();
     mir_graph->DoDFSPreOrderSSARename(mir_graph->GetEntryBlock());
@@ -226,9 +226,9 @@ class PerformInitRegLocations : public PassME {
   PerformInitRegLocations() : PassME("PerformInitRegLocation") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph->InitRegLocations();
   }
@@ -254,9 +254,9 @@ class ConstantPropagation : public PassME {
     return false;
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph->InitializeConstantPropagation();
   }
@@ -271,9 +271,9 @@ class FreeData : public PassME {
   FreeData() : PassME("FreeData") {
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->SSATransformationEnd();
   }
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index a895e6ec34..5083bbcc15 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1628,7 +1628,7 @@ void ArmMir2Lir::AssembleLIR() {
   CreateNativeGcMap();
 }
 
-int ArmMir2Lir::GetInsnSize(LIR* lir) {
+size_t ArmMir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 8db7d4ee73..44998627ca 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -87,7 +87,7 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
     ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
@@ -205,8 +205,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
     LIR* LoadFPConstantValue(int r_dest, int value);
-    LIR* LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
-                              RegStorage r_src_dest, RegStorage r_work = RegStorage::InvalidReg());
+    LIR* LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
+                                              int displacement, RegStorage r_src_dest,
+                                              RegStorage r_work = RegStorage::InvalidReg());
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 61d3d56036..b236f99311 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -820,15 +820,17 @@ LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStor
 }
 
 // Helper function for LoadBaseDispBody()/StoreBaseDispBody().
-LIR* ArmMir2Lir::LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
-                                      RegStorage r_src_dest, RegStorage r_work) {
+LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
+                                                      int displacement, RegStorage r_src_dest,
+                                                      RegStorage r_work) {
   DCHECK_EQ(displacement & 3, 0);
-  int encoded_disp = (displacement & 1020) >> 2;  // Within range of the instruction.
+  constexpr int kOffsetMask = 0xff << 2;
+  int encoded_disp = (displacement & kOffsetMask) >> 2;  // Within range of the instruction.
   RegStorage r_ptr = r_base;
-  if ((displacement & ~1020) != 0) {
+  if ((displacement & ~kOffsetMask) != 0) {
     r_ptr = r_work.Valid() ? r_work : AllocTemp();
-    // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
-    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
+    // Add displacement & ~kOffsetMask to base, it's a single instruction for up to +-256KiB.
+    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~kOffsetMask);
   }
   LIR* lir = nullptr;
   if (!r_src_dest.IsPair()) {
@@ -837,7 +839,7 @@ LIR* ArmMir2Lir::LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int d
     lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(),
                   encoded_disp);
   }
-  if ((displacement & ~1020) != 0 && !r_work.Valid()) {
+  if ((displacement & ~kOffsetMask) != 0 && !r_work.Valid()) {
     FreeTemp(r_ptr);
   }
   return lir;
@@ -863,11 +865,12 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
     case k64:
       if (r_dest.IsFloat()) {
         DCHECK(!r_dest.IsPair());
-        load = LoadStoreMaxDisp1020(kThumb2Vldrd, r_base, displacement, r_dest);
+        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrd, r_base, displacement, r_dest);
       } else {
         DCHECK(r_dest.IsPair());
         // Use the r_dest.GetLow() for the temporary pointer if needed.
-        load = LoadStoreMaxDisp1020(kThumb2LdrdI8, r_base, displacement, r_dest, r_dest.GetLow());
+        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2LdrdI8, r_base, displacement, r_dest,
+                                                    r_dest.GetLow());
       }
       already_generated = true;
       break;
@@ -878,7 +881,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
     case kReference:
       if (r_dest.IsFloat()) {
         DCHECK(r_dest.IsSingle());
-        load = LoadStoreMaxDisp1020(kThumb2Vldrs, r_base, displacement, r_dest);
+        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrs, r_base, displacement, r_dest);
         already_generated = true;
         break;
       }
@@ -1001,10 +1004,10 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
     case k64:
       if (r_src.IsFloat()) {
         DCHECK(!r_src.IsPair());
-        store = LoadStoreMaxDisp1020(kThumb2Vstrd, r_base, displacement, r_src);
+        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrd, r_base, displacement, r_src);
       } else {
         DCHECK(r_src.IsPair());
-        store = LoadStoreMaxDisp1020(kThumb2StrdI8, r_base, displacement, r_src);
+        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2StrdI8, r_base, displacement, r_src);
       }
       already_generated = true;
       break;
@@ -1015,7 +1018,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
     case kReference:
       if (r_src.IsFloat()) {
         DCHECK(r_src.IsSingle());
-        store = LoadStoreMaxDisp1020(kThumb2Vstrs, r_base, displacement, r_src);
+        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrs, r_base, displacement, r_src);
         already_generated = true;
         break;
       }
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 2c4f26216f..c5bd005abf 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -688,9 +688,10 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
 
               // Fail, if `expected' contains an unsatisfied requirement.
               if (expected != nullptr) {
-                // TODO(Arm64): make this FATAL.
-                LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name
-                             << ". Expected " << expected << ", got 0x" << std::hex << operand;
+                LOG(WARNING) << "Method: " << PrettyMethod(cu_->method_idx, *cu_->dex_file)
+                             << " @ 0x" << std::hex << lir->dalvik_offset;
+                LOG(FATAL) << "Bad argument n. " << i << " of " << encoder->name
+                           << ". Expected " << expected << ", got 0x" << std::hex << operand;
               }
             }
 
@@ -887,7 +888,7 @@ void Arm64Mir2Lir::AssembleLIR() {
   CreateNativeGcMap();
 }
 
-int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
+size_t Arm64Mir2Lir::GetInsnSize(LIR* lir) {
   ArmOpcode opcode = UNWIDE(lir->opcode);
   DCHECK(!IsPseudoLirOp(opcode));
   return EncodingMap[opcode].size;
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 59eec3d486..f1748effb2 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -142,7 +142,7 @@ void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
   // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
-  LoadBaseIndexed(table_base, key_reg, As64BitReg(disp_reg), 2, k32);
+  LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
 
   // Get base branch address.
   RegStorage branch_reg = AllocTempWide();
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index bf09b8610e..0fa7f2bfbd 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -20,9 +20,45 @@
 #include "arm64_lir.h"
 #include "dex/compiler_internals.h"
 
+#include <map>
+
 namespace art {
 
 class Arm64Mir2Lir : public Mir2Lir {
+ protected:
+  // TODO: consolidate 64-bit target support.
+  class InToRegStorageMapper {
+   public:
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+    virtual ~InToRegStorageMapper() {}
+  };
+
+  class InToRegStorageArm64Mapper : public InToRegStorageMapper {
+   public:
+    InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+    virtual ~InToRegStorageArm64Mapper() {}
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+   private:
+    int cur_core_reg_;
+    int cur_fp_reg_;
+  };
+
+  class InToRegStorageMapping {
+   public:
+    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
+    initialized_(false) {}
+    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+    int GetMaxMappedIn() { return max_mapped_in_; }
+    bool IsThereStackMapped() { return is_there_stack_mapped_; }
+    RegStorage Get(int in_position);
+    bool IsInitialized() { return initialized_; }
+   private:
+    std::map<int, RegStorage> mapping_;
+    int max_mapped_in_;
+    bool is_there_stack_mapped_;
+    bool initialized_;
+  };
+
   public:
     Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -51,7 +87,9 @@ class Arm64Mir2Lir : public Mir2Lir {
                           OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                               RegStorage r_src, OpSize size) OVERRIDE;
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
+    LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
+                           int offset, int check_value, LIR* target) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg);
@@ -86,7 +124,7 @@ class Arm64Mir2Lir : public Mir2Lir {
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
     ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
@@ -219,12 +257,21 @@ class Arm64Mir2Lir : public Mir2Lir {
     bool InexpensiveConstantDouble(int64_t value);
 
     void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    int LoadArgRegs(CallInfo* info, int call_state,
-                    NextCallInsn next_call_insn,
-                    const MethodReference& target_method,
-                    uint32_t vtable_idx,
-                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                    bool skip_this);
+
+    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                             NextCallInsn next_call_insn,
+                             const MethodReference& target_method,
+                             uint32_t vtable_idx,
+                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                             bool skip_this);
+
+    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                           NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx,
+                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                           bool skip_this);
+    InToRegStorageMapping in_to_reg_storage_mapping_;
 
   private:
     /**
@@ -261,8 +308,8 @@ class Arm64Mir2Lir : public Mir2Lir {
       return ret_val;
     }
 
-    LIR* LoadFPConstantValue(int r_dest, int32_t value);
-    LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
+    LIR* LoadFPConstantValue(RegStorage r_dest, int32_t value);
+    LIR* LoadFPConstantValueWide(RegStorage r_dest, int64_t value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 265e8d2020..9814cb4a7a 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -45,7 +45,6 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturn(kFPReg);
@@ -89,7 +88,6 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       {
         ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod);
         RegStorage r_tgt = CallHelperSetup(helper_offset);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 8112c2ec6d..2ac4adbadc 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -160,6 +160,19 @@ LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_
   return branch;
 }
 
+LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
+                                     RegStorage base_reg, int offset, int check_value,
+                                     LIR* target) {
+  // It is possible that temp register is 64-bit. (ArgReg or RefReg)
+  // Always compare 32-bit value no matter what temp_reg is.
+  if (temp_reg.Is64Bit()) {
+    temp_reg = As32BitReg(temp_reg);
+  }
+  Load32Disp(base_reg, offset, temp_reg);
+  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
+  return branch;
+}
+
 LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
   bool dest_is_fp = r_dest.IsFloat();
   bool src_is_fp = r_src.IsFloat();
@@ -551,8 +564,11 @@ LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
 
 // Decrement register and branch on condition
 LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
-  // Combine sub & test using sub setflags encoding here
-  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
+  // Combine sub & test using sub setflags encoding here.  We need to make sure a
+  // subtract form that sets carry is used, so generate explicitly.
+  // TODO: might be best to add a new op, kOpSubs, and handle it generically.
+  ArmOpcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
+  NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   return OpCondBranch(c_code, target);
 }
@@ -676,9 +692,6 @@ void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, Reg
  */
 void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) {
-  // TODO(Arm64): check this.
-  UNIMPLEMENTED(WARNING);
-
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -720,7 +733,8 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
     } else {
       // No special indexed operation, lea + load w/ displacement
       reg_ptr = AllocTempRef();
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
+                       EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -754,7 +768,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
       GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
-    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
+    LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
     MarkPossibleNullPointerException(opt_flags);
     FreeTemp(reg_ptr);
     StoreValue(rl_dest, rl_result);
@@ -767,9 +781,6 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
  */
 void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  // TODO(Arm64): check this.
-  UNIMPLEMENTED(WARNING);
-
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   bool constant_index = rl_index.is_const;
@@ -825,7 +836,8 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
       rl_src = LoadValue(rl_src, reg_class);
     }
     if (!constant_index) {
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
+                       EncodeShift(kA64Lsl, scale));
     }
     if (needs_range_check) {
       if (constant_index) {
@@ -846,7 +858,7 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
       GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
-    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
+    StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size);
     MarkPossibleNullPointerException(opt_flags);
   }
   if (allocated_reg_ptr_temp) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index e2846aebc3..fba368aa8c 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -105,7 +105,6 @@ RegLocation Arm64Mir2Lir::LocCReturnDouble() {
 
 // Return a target-dependent special register.
 RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
-  // TODO(Arm64): this function doesn't work for hard-float ABI.
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
     case kSelf: res_reg = rs_rA64_SELF; break;
@@ -117,12 +116,20 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
     case kArg1: res_reg = rs_x1; break;
     case kArg2: res_reg = rs_x2; break;
     case kArg3: res_reg = rs_x3; break;
+    case kArg4: res_reg = rs_x4; break;
+    case kArg5: res_reg = rs_x5; break;
+    case kArg6: res_reg = rs_x6; break;
+    case kArg7: res_reg = rs_x7; break;
     case kFArg0: res_reg = rs_f0; break;
     case kFArg1: res_reg = rs_f1; break;
     case kFArg2: res_reg = rs_f2; break;
     case kFArg3: res_reg = rs_f3; break;
+    case kFArg4: res_reg = rs_f4; break;
+    case kFArg5: res_reg = rs_f5; break;
+    case kFArg6: res_reg = rs_f6; break;
+    case kFArg7: res_reg = rs_f7; break;
     case kRet0: res_reg = rs_x0; break;
-    case kRet1: res_reg = rs_x0; break;
+    case kRet1: res_reg = rs_x1; break;
     case kInvokeTgt: res_reg = rs_rA64_LR; break;
     case kHiddenArg: res_reg = rs_x12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
@@ -132,10 +139,6 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
   return res_reg;
 }
 
-RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  return RegStorage::InvalidReg();
-}
-
 /*
  * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
  */
@@ -738,18 +741,44 @@ RegLocation Arm64Mir2Lir::GetReturnAlt() {
 
 /* To be used when explicitly managing register use */
 void Arm64Mir2Lir::LockCallTemps() {
+  // TODO: needs cleanup.
   LockTemp(rs_x0);
   LockTemp(rs_x1);
   LockTemp(rs_x2);
   LockTemp(rs_x3);
+  LockTemp(rs_x4);
+  LockTemp(rs_x5);
+  LockTemp(rs_x6);
+  LockTemp(rs_x7);
+  LockTemp(rs_f0);
+  LockTemp(rs_f1);
+  LockTemp(rs_f2);
+  LockTemp(rs_f3);
+  LockTemp(rs_f4);
+  LockTemp(rs_f5);
+  LockTemp(rs_f6);
+  LockTemp(rs_f7);
 }
 
 /* To be used when explicitly managing register use */
 void Arm64Mir2Lir::FreeCallTemps() {
+  // TODO: needs cleanup.
   FreeTemp(rs_x0);
   FreeTemp(rs_x1);
   FreeTemp(rs_x2);
   FreeTemp(rs_x3);
+  FreeTemp(rs_x4);
+  FreeTemp(rs_x5);
+  FreeTemp(rs_x6);
+  FreeTemp(rs_x7);
+  FreeTemp(rs_f0);
+  FreeTemp(rs_f1);
+  FreeTemp(rs_f2);
+  FreeTemp(rs_f3);
+  FreeTemp(rs_f4);
+  FreeTemp(rs_f5);
+  FreeTemp(rs_f6);
+  FreeTemp(rs_f7);
 }
 
 RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
@@ -786,6 +815,69 @@ const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
   return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
 }
 
+RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float,
+                                                               bool is_wide) {
+  const RegStorage coreArgMappingToPhysicalReg[] =
+      {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
+  const int coreArgMappingToPhysicalRegSize =
+      sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const RegStorage fpArgMappingToPhysicalReg[] =
+      {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7};
+  const int fpArgMappingToPhysicalRegSize =
+      sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (is_double_or_float) {
+    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+      result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
+      if (result.Valid()) {
+        // TODO: switching between widths remains a bit ugly.  Better way?
+        int res_reg = result.GetReg();
+        result = is_wide ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
+      }
+    }
+  } else {
+    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+      if (result.Valid()) {
+        // TODO: switching between widths remains a bit ugly.  Better way?
+        int res_reg = result.GetReg();
+        result = is_wide ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
+      }
+    }
+  }
+  return result;
+}
+
+RegStorage Arm64Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void Arm64Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
+                                                     InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  max_mapped_in_ = -1;
+  is_there_stack_mapped_ = false;
+  for (int in_position = 0; in_position < count; in_position++) {
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+     if (reg.Valid()) {
+       mapping_[in_position] = reg;
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
+       if (reg.Is64BitSolo()) {
+         // We covered 2 args, so skip the next one
+         in_position++;
+       }
+     } else {
+       is_there_stack_mapped_ = true;
+     }
+  }
+  initialized_ = true;
+}
+
+
+// Deprecate.  Use the new mechanism.
 // TODO(Arm64): reuse info in QuickArgumentVisitor?
 static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
                                     OpSize* op_size) {
@@ -805,7 +897,7 @@ static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* nu
     }
   } else {
     int n = *num_gpr_used;
-    if (n < 7) {
+    if (n < 8) {
       *num_gpr_used = n + 1;
       if (loc->wide) {
         *op_size = k64;
@@ -820,6 +912,18 @@ static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* nu
   return RegStorage::InvalidReg();
 }
 
+RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+    InToRegStorageArm64Mapper mapper;
+    in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform initial
@@ -888,33 +992,188 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
   }
 }
 
-int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
-                              NextCallInsn next_call_insn,
-                              const MethodReference& target_method,
-                              uint32_t vtable_idx, uintptr_t direct_code,
-                              uintptr_t direct_method, InvokeType type, bool skip_this) {
-  int last_arg_reg = TargetReg(kArg3).GetReg();
-  int next_reg = TargetReg(kArg1).GetReg();
-  int next_arg = 0;
-  if (skip_this) {
-    next_reg++;
-    next_arg++;
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.
+ */
+int Arm64Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+                                       int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+                                       const MethodReference& target_method,
+                                       uint32_t vtable_idx, uintptr_t direct_code,
+                                       uintptr_t direct_method, InvokeType type, bool skip_this) {
+  return GenDalvikArgsRange(info,
+                       call_state, pcrLabel, next_call_insn,
+                       target_method,
+                       vtable_idx, direct_code,
+                       direct_method, type, skip_this);
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * FIXME: update comments.
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *
+ */
+int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+                                     LIR** pcrLabel, NextCallInsn next_call_insn,
+                                     const MethodReference& target_method,
+                                     uint32_t vtable_idx, uintptr_t direct_code,
+                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
+  /* If no arguments, just return */
+  if (info->num_arg_words == 0)
+    return call_state;
+
+  const int start_index = skip_this ? 1 : 0;
+
+  InToRegStorageArm64Mapper mapper;
+  InToRegStorageMapping in_to_reg_storage_mapping;
+  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
+          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+
+  // Fisrt of all, check whether it make sense to use bulk copying
+  // Optimization is aplicable only for range case
+  // TODO: make a constant instead of 2
+  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+    // Scan the rest of the args - if in phys_reg flush to memory
+    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+      RegLocation loc = info->args[next_arg];
+      if (loc.wide) {
+        loc = UpdateLocWide(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
+        }
+        next_arg += 2;
+      } else {
+        loc = UpdateLoc(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
+        }
+        next_arg++;
+      }
+    }
+
+    // Logic below assumes that Method pointer is at offset zero from SP.
+    DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+    // The rest can be copied together
+    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
+                                                   cu_->instruction_set);
+
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    // Only davik regs are accessed in this loop; no next_call_insn() calls.
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    while (regs_left_to_pass_via_stack > 0) {
+      /*
+       * TODO: Improve by adding block copy for large number of arguments.  This
+       * should be done, if possible, as a target-depending helper.  For now, just
+       * copy a Dalvik vreg at a time.
+       */
+      // Moving 32-bits via general purpose register.
+      size_t bytes_to_move = sizeof(uint32_t);
+
+      // Instead of allocating a new temp, simply reuse one of the registers being used
+      // for argument passing.
+      RegStorage temp = TargetReg(kArg3);
+
+      // Now load the argument VR and store to the outs.
+      Load32Disp(TargetReg(kSp), current_src_offset, temp);
+      Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+  }
+
+  // Now handle rest not registers if they are
+  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+    RegStorage regSingle = TargetReg(kArg2);
+    RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.wide) {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+            } else {
+              LoadValueDirectWideFixed(rl_arg, regWide);
+              StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+            }
+            i++;
+          } else {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+            } else {
+              LoadValueDirectFixed(rl_arg, regSingle);
+              StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+            }
+          }
+        }
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+    }
   }
-  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
-    RegLocation rl_arg = info->args[next_arg++];
+
+  // Finish with mapped registers
+  for (int i = start_index; i <= last_mapped_in; i++) {
+    RegLocation rl_arg = info->args[i];
     rl_arg = UpdateRawLoc(rl_arg);
-    if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
-      LoadValueDirectWideFixed(rl_arg, RegStorage::Solo64(next_reg));
-      next_arg++;
-    } else {
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
       if (rl_arg.wide) {
-        rl_arg = NarrowRegLoc(rl_arg);
-        rl_arg.is_const = false;
+        LoadValueDirectWideFixed(rl_arg, reg);
+        i++;
+      } else {
+        LoadValueDirectFixed(rl_arg, reg);
       }
-      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg));
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+  }
+
+  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                           direct_code, direct_method, type);
+  if (pcrLabel) {
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      Load32Disp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
     }
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                direct_code, direct_method, type);
   }
   return call_state;
 }
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 71e9e95ab0..2254b8bb02 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -86,14 +86,14 @@ static int32_t EncodeImmDouble(uint64_t bits) {
   return (bit7 | bit6 | bit5_to_0);
 }
 
-LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) {
-  DCHECK(RegStorage::IsSingle(r_dest));
+LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
+  DCHECK(r_dest.IsSingle());
   if (value == 0) {
-    return NewLIR2(kA64Fmov2sw, r_dest, rwzr);
+    return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr);
   } else {
     int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
     if (encoded_imm >= 0) {
-      return NewLIR2(kA64Fmov2fI, r_dest, encoded_imm);
+      return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm);
     }
   }
 
@@ -104,19 +104,19 @@ LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) {
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
-                            r_dest, 0, 0, 0, 0, data_target);
+                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
 
-LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) {
-  DCHECK(RegStorage::IsDouble(r_dest));
+LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
+  DCHECK(r_dest.IsDouble());
   if (value == 0) {
-    return NewLIR2(kA64Fmov2Sx, r_dest, rwzr);
+    return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr);
   } else {
     int32_t encoded_imm = EncodeImmDouble(value);
     if (encoded_imm >= 0) {
-      return NewLIR2(FWIDE(kA64Fmov2fI), r_dest, encoded_imm);
+      return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
     }
   }
 
@@ -128,20 +128,19 @@ LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) {
     data_target = AddWideData(&literal_list_, val_lo, val_hi);
   }
 
-  DCHECK(RegStorage::IsFloat(r_dest));
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
-                            r_dest, 0, 0, 0, 0, data_target);
+                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
 
 static int CountLeadingZeros(bool is_wide, uint64_t value) {
-  return (is_wide) ? __builtin_clzl(value) : __builtin_clz((uint32_t)value);
+  return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value);
 }
 
 static int CountTrailingZeros(bool is_wide, uint64_t value) {
-  return (is_wide) ? __builtin_ctzl(value) : __builtin_ctz((uint32_t)value);
+  return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value);
 }
 
 static int CountSetBits(bool is_wide, uint64_t value) {
@@ -276,12 +275,16 @@ LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
   LIR* res;
 
   if (r_dest.IsFloat()) {
-    return LoadFPConstantValue(r_dest.GetReg(), value);
+    return LoadFPConstantValue(r_dest, value);
+  }
+
+  if (r_dest.Is64Bit()) {
+    return LoadConstantWide(r_dest, value);
   }
 
   // Loading SP/ZR with an immediate is not supported.
-  DCHECK_NE(r_dest.GetReg(), rwsp);
-  DCHECK_NE(r_dest.GetReg(), rwzr);
+  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
+  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
 
   // Compute how many movk, movz instructions are needed to load the value.
   uint16_t high_bits = High16Bits(value);
@@ -331,6 +334,98 @@ LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
   return res;
 }
 
+// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide().
+LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
+  // Maximum number of instructions to use for encoding the immediate.
+  const int max_num_ops = 2;
+
+  if (r_dest.IsFloat()) {
+    return LoadFPConstantValueWide(r_dest, value);
+  }
+
+  DCHECK(r_dest.Is64Bit());
+
+  // Loading SP/ZR with an immediate is not supported.
+  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
+  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
+
+  if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
+    // value is either 0 or -1: we can just use xzr.
+    ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
+    return NewLIR2(opcode, r_dest.GetReg(), rxzr);
+  }
+
+  // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many.
+  int num_0000_halfwords = 0;
+  int num_ffff_halfwords = 0;
+  uint64_t uvalue = static_cast<uint64_t>(value);
+  for (int shift = 0; shift < 64; shift += 16) {
+    uint16_t halfword = static_cast<uint16_t>(uvalue >> shift);
+    if (halfword == 0)
+      num_0000_halfwords++;
+    else if (halfword == UINT16_C(0xffff))
+      num_ffff_halfwords++;
+  }
+  int num_fast_halfwords = std::max(num_0000_halfwords, num_ffff_halfwords);
+
+  if (num_fast_halfwords < 3) {
+    // A single movz/movn is not enough. Try the logical immediate route.
+    int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value);
+    if (log_imm >= 0) {
+      return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm);
+    }
+  }
+
+  if (num_fast_halfwords >= 4 - max_num_ops) {
+    // We can encode the number using a movz/movn followed by one or more movk.
+    ArmOpcode op;
+    uint16_t background;
+    LIR* res = nullptr;
+
+    // Decide whether to use a movz or a movn.
+    if (num_0000_halfwords >= num_ffff_halfwords) {
+      op = WIDE(kA64Movz3rdM);
+      background = 0;
+    } else {
+      op = WIDE(kA64Movn3rdM);
+      background = 0xffff;
+    }
+
+    // Emit the first instruction (movz, movn).
+    int shift;
+    for (shift = 0; shift < 4; shift++) {
+      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
+      if (halfword != background) {
+        res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift);
+        break;
+      }
+    }
+
+    // Emit the movk instructions.
+    for (shift++; shift < 4; shift++) {
+      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
+      if (halfword != background) {
+        NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift);
+      }
+    }
+    return res;
+  }
+
+  // Use the literal pool.
+  int32_t val_lo = Low32Bits(value);
+  int32_t val_hi = High32Bits(value);
+  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+  if (data_target == NULL) {
+    data_target = AddWideData(&literal_list_, val_lo, val_hi);
+  }
+
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
+  LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
+                    r_dest.GetReg(), 0, 0, 0, 0, data_target);
+  AppendLIR(res);
+  return res;
+}
+
 LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
   LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
   res->target = target;
@@ -738,29 +833,6 @@ LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value)
     return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
 }
 
-LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
-  if (r_dest.IsFloat()) {
-    return LoadFPConstantValueWide(r_dest.GetReg(), value);
-  } else {
-    // TODO(Arm64): check whether we can load the immediate with a short form.
-    //   e.g. via movz, movk or via logical immediate.
-
-    // No short form - load from the literal pool.
-    int32_t val_lo = Low32Bits(value);
-    int32_t val_hi = High32Bits(value);
-    LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
-    if (data_target == NULL) {
-      data_target = AddWideData(&literal_list_, val_lo, val_hi);
-    }
-
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-    LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
-                      r_dest.GetReg(), 0, 0, 0, 0, data_target);
-    AppendLIR(res);
-    return res;
-  }
-}
-
 int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
   return ((shift_type & 0x3) << 7) | (amount & 0x1f);
 }
@@ -778,6 +850,11 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto
   LIR* load;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
+  DCHECK(r_base.Is64Bit());
+  // TODO: need a cleaner handling of index registers here and throughout.
+  if (r_index.Is32Bit()) {
+    r_index = As64BitReg(r_index);
+  }
 
   if (r_dest.IsFloat()) {
     if (r_dest.IsDouble()) {
@@ -846,6 +923,11 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt
   LIR* store;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
+  DCHECK(r_base.Is64Bit());
+  // TODO: need a cleaner handling of index registers here and throughout.
+  if (r_index.Is32Bit()) {
+    r_index = As64BitReg(r_index);
+  }
 
   if (r_src.IsFloat()) {
     if (r_src.IsDouble()) {
@@ -968,8 +1050,9 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor
     load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
   } else {
     // Use long sequence.
-    RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, displacement);
+    // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
+    RegStorage r_scratch = AllocTempWide();
+    LoadConstantWide(r_scratch, displacement);
     load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
     FreeTemp(r_scratch);
   }
@@ -1050,8 +1133,8 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto
     store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
   } else {
     // Use long sequence.
-    RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, displacement);
+    RegStorage r_scratch = AllocTempWide();
+    LoadConstantWide(r_scratch, displacement);
     store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
     FreeTemp(r_scratch);
   }
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 6397208790..3f9379c8b3 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -292,10 +292,14 @@ bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier)
   return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
-bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) {
+bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
-  return it != inline_methods_.end() && (it->second.flags & kInlineIntrinsic) != 0;
+  bool res = (it != inline_methods_.end() && (it->second.flags & kInlineIntrinsic) != 0);
+  if (res && intrinsic != nullptr) {
+    *intrinsic = it->second;
+  }
+  return res;
 }
 
 bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) {
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index c03f89c8fa..70693c2013 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -67,7 +67,7 @@ class DexFileMethodInliner {
     /**
      * Check whether a particular method index corresponds to an intrinsic function.
      */
-    bool IsIntrinsic(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+    bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
 
     /**
      * Generate code for an intrinsic function invocation.
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index f9081cea08..3b99421a6a 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -73,7 +73,7 @@ void Mir2Lir::AddDivZeroCheckSlowPath(LIR* branch) {
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pThrowDivZero), true);
       } else {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowDivZero), true);
@@ -96,7 +96,7 @@ void Mir2Lir::GenArrayBoundsCheck(RegStorage index, RegStorage length) {
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       index_, length_, true);
       } else {
@@ -129,7 +129,7 @@ void Mir2Lir::GenArrayBoundsCheck(int index, RegStorage length) {
 
       m2l_->OpRegCopy(m2l_->TargetReg(kArg1), length_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
       } else {
@@ -158,7 +158,7 @@ LIR* Mir2Lir::GenNullCheck(RegStorage reg) {
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pThrowNullPointer), true);
       } else {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowNullPointer), true);
@@ -385,7 +385,7 @@ static void GenNewArrayImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu,
  */
 void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest,
                           RegLocation rl_src) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenNewArrayImpl<8>(this, cu_, type_idx, rl_dest, rl_src);
   } else {
     GenNewArrayImpl<4>(this, cu_, type_idx, rl_dest, rl_src);
@@ -414,7 +414,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
   int elems = info->num_arg_words;
   int type_idx = info->index;
   FlushAllRegs();  /* Everything to home location */
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenFilledNewArrayCall<8>(this, cu_, elems, type_idx);
   } else {
     GenFilledNewArrayCall<4>(this, cu_, elems, type_idx);
@@ -457,12 +457,13 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
      * critical.
      */
     // This is addressing the stack, which may be out of the 4G area.
-    RegStorage r_src = cu_->target64 ? AllocTempWide() : AllocTemp();
-    RegStorage r_dst = AllocTemp();
-    RegStorage r_idx = AllocTemp();
+    RegStorage r_src = AllocTempRef();
+    RegStorage r_dst = AllocTempRef();
+    RegStorage r_idx = AllocTempRef();  // Not really a reference, but match src/dst.
     RegStorage r_val;
     switch (cu_->instruction_set) {
       case kThumb2:
+      case kArm64:
         r_val = TargetReg(kLr);
         break;
       case kX86:
@@ -531,7 +532,7 @@ class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
   void Compile() {
     LIR* unresolved_target = GenerateTargetLabel();
     uninit_->target = unresolved_target;
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeStaticStorage),
                                  storage_index_, true);
     } else {
@@ -640,7 +641,7 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
     FreeTemp(r_base);
   } else {
     FlushAllRegs();  // Everything to home locations
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenSputCall<8>(this, is_long_or_double, is_object, &field_info, rl_src);
     } else {
       GenSputCall<4>(this, is_long_or_double, is_object, &field_info, rl_src);
@@ -734,7 +735,7 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
     }
   } else {
     FlushAllRegs();  // Everything to home locations
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenSgetCall<8>(this, is_long_or_double, is_object, &field_info);
     } else {
       GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
@@ -801,7 +802,7 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
       StoreValue(rl_dest, rl_result);
     }
   } else {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenIgetCall<8>(this, is_long_or_double, is_object, &field_info, rl_obj);
     } else {
       GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
@@ -861,7 +862,7 @@ void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
       MarkGCCard(rl_src.reg, rl_obj.reg);
     }
   } else {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenIputCall<8>(this, is_long_or_double, is_object, &field_info, rl_obj, rl_src);
     } else {
       GenIputCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj, rl_src);
@@ -885,7 +886,7 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl
   bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK);
   bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) &&
       (opt_flags & MIR_IGNORE_NULL_CHECK));
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenArrayObjPutCall<8>(this, needs_range_check, needs_null_check, rl_array, rl_index, rl_src);
   } else {
     GenArrayObjPutCall<4>(this, needs_range_check, needs_null_check, rl_array, rl_index, rl_src);
@@ -894,14 +895,15 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl
 
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
-  RegStorage res_reg = AllocTemp();
+  DCHECK(!cu_->target64 || rl_method.reg.Is64Bit());
+  RegStorage res_reg = AllocTempRef();
   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                    *cu_->dex_file,
                                                    type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                               type_idx, rl_method.reg, true);
     } else {
@@ -936,7 +938,7 @@ void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
         void Compile() {
           GenerateTargetLabel();
 
-          if (Is64BitInstructionSet(cu_->instruction_set)) {
+          if (cu_->target64) {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
                                           rl_method_.reg, true);
           } else {
@@ -1005,7 +1007,7 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
 
         void Compile() {
           GenerateTargetLabel();
-          if (Is64BitInstructionSet(cu_->instruction_set)) {
+          if (cu_->target64) {
             m2l_->CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pResolveString),
                                           r_method_, string_idx_, true);
           } else {
@@ -1094,7 +1096,7 @@ static void GenNewInstanceImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, uint32_
  * call Class::NewInstanceFromCode(type_idx, method);
  */
 void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenNewInstanceImpl<8>(this, cu_, type_idx, rl_dest);
   } else {
     GenNewInstanceImpl<4>(this, cu_, type_idx, rl_dest);
@@ -1103,7 +1105,7 @@ void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) {
 
 void Mir2Lir::GenThrow(RegLocation rl_src) {
   FlushAllRegs();
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pDeliverException), rl_src, true);
   } else {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pDeliverException), rl_src, true);
@@ -1182,7 +1184,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     } else {
@@ -1207,7 +1209,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Not resolved
       // Call out to helper, which will return resolved type in kRet0
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
@@ -1247,7 +1249,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
     }
   } else {
     if (cu_->instruction_set == kThumb2) {
-      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+      RegStorage r_tgt = cu_->target64 ?
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
       LIR* it = nullptr;
@@ -1269,7 +1271,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
         LoadConstant(rl_result.reg, 1);     // assume true
         branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
       }
-      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+      RegStorage r_tgt = cu_->target64 ?
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
       OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
@@ -1332,7 +1334,7 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kRet0
     // InitializeTypeAndVerifyAccess(idx, method)
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                               type_idx, TargetReg(kArg1), true);
     } else {
@@ -1368,7 +1370,7 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
 
           // Call out to helper, which will return resolved type in kArg0
           // InitializeTypeFromCode(idx, method)
-          if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+          if (m2l_->cu_->target64) {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
                                           m2l_->TargetReg(kArg1), true);
           } else {
@@ -1405,7 +1407,7 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
         m2l_->LoadRefDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
                           m2l_->TargetReg(kArg1));
       }
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast), m2l_->TargetReg(kArg2),
                                       m2l_->TargetReg(kArg1), true);
       } else {
@@ -1520,7 +1522,7 @@ static void GenShiftOpLongCall(Mir2Lir* mir_to_lir, Instruction::Code opcode, Re
 
 void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                              RegLocation rl_src1, RegLocation rl_shift) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenShiftOpLongCall<8>(this, opcode, rl_src1, rl_shift);
   } else {
     GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift);
@@ -1653,7 +1655,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
     if (!done) {
       FlushAllRegs();   /* Send everything to home location */
       LoadValueDirectFixed(rl_src2, TargetReg(kArg1));
-      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+      RegStorage r_tgt = cu_->target64 ?
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod)) :
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod));
       LoadValueDirectFixed(rl_src1, TargetReg(kArg0));
@@ -1661,7 +1663,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
         GenDivZeroCheck(TargetReg(kArg1));
       }
       // NOTE: callout here is not a safepoint.
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), false /* not a safepoint */);
       } else {
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */);
@@ -1924,7 +1926,7 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re
         FlushAllRegs();   /* Everything to home location. */
         LoadValueDirectFixed(rl_src, TargetReg(kArg0));
         Clobber(TargetReg(kArg0));
-        if (Is64BitInstructionSet(cu_->instruction_set)) {
+        if (cu_->target64) {
           CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), TargetReg(kArg0), lit,
                                   false);
         } else {
@@ -2104,7 +2106,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc
 
 void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
                              RegLocation rl_src1, RegLocation rl_src2) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenArithOpLongImpl<8>(this, cu_, opcode, rl_dest, rl_src1, rl_src2);
   } else {
     GenArithOpLongImpl<4>(this, cu_, opcode, rl_dest, rl_src1, rl_src2);
@@ -2156,7 +2158,7 @@ class SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
     m2l_->ResetRegPool();
     m2l_->ResetDefTracking();
     GenerateTargetLabel(kPseudoSuspendTarget);
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pTestSuspend), true);
     } else {
       m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pTestSuspend), true);
@@ -2215,7 +2217,7 @@ void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
 /* Call out to helper assembly routine that will null check obj and then lock it. */
 void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pLockObject), rl_src, true);
   } else {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pLockObject), rl_src, true);
@@ -2225,7 +2227,7 @@ void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
 /* Call out to helper assembly routine that will null check obj and then unlock it. */
 void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject), rl_src, true);
   } else {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject), rl_src, true);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index a90a06e1ba..641579f354 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -156,7 +156,7 @@ void Mir2Lir::CallRuntimeHelperRegLocation(ThreadOffset<pointer_size> helper_off
     LoadValueDirectFixed(arg0, TargetReg(kArg0));
   } else {
     RegStorage r_tmp;
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
     } else {
       r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
@@ -187,7 +187,7 @@ void Mir2Lir::CallRuntimeHelperImmRegLocation(ThreadOffset<pointer_size> helper_
     LoadValueDirectFixed(arg1, TargetReg(kArg1));
   } else {
     RegStorage r_tmp;
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
     } else {
       r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
@@ -309,7 +309,7 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size>
         LoadValueDirectWideFixed(arg1, r_tmp);
       } else {
         RegStorage r_tmp;
-        if (cu_->instruction_set == kX86_64) {
+        if (cu_->target64) {
           r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
         } else {
           r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
@@ -320,13 +320,13 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size>
   } else {
     RegStorage r_tmp;
     if (arg0.fp) {
-      if (cu_->instruction_set == kX86_64) {
+      if (cu_->target64) {
         r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
       } else {
         r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
       }
     } else {
-      if (cu_->instruction_set == kX86_64) {
+      if (cu_->target64) {
         r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
       } else {
         r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
@@ -334,7 +334,7 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size>
     }
     LoadValueDirectWideFixed(arg0, r_tmp);
     if (arg1.wide == 0) {
-      if (cu_->instruction_set == kX86_64) {
+      if (cu_->target64) {
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
       } else {
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
@@ -342,13 +342,13 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size>
     } else {
       RegStorage r_tmp;
       if (arg1.fp) {
-        if (cu_->instruction_set == kX86_64) {
+        if (cu_->target64) {
           r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
         } else {
           r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
         }
       } else {
-        if (cu_->instruction_set == kX86_64) {
+        if (cu_->target64) {
           r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
         } else {
           r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
@@ -440,7 +440,7 @@ void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset<pointer_si
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
   } else {
     RegStorage r_tmp;
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg());
     } else {
       r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
@@ -779,7 +779,7 @@ static int NextStaticCallInsnSP(CompilationUnit* cu, CallInfo* info,
                                 const MethodReference& target_method,
                                 uint32_t unused, uintptr_t unused2,
                                 uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeStaticTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -792,7 +792,7 @@ static int NextDirectCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
                                 const MethodReference& target_method,
                                 uint32_t unused, uintptr_t unused2,
                                 uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeDirectTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -805,7 +805,7 @@ static int NextSuperCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
                                const MethodReference& target_method,
                                uint32_t unused, uintptr_t unused2,
                                uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeSuperTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -818,7 +818,7 @@ static int NextVCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
                            const MethodReference& target_method,
                            uint32_t unused, uintptr_t unused2,
                            uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeVirtualTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -832,7 +832,7 @@ static int NextInterfaceCallInsnWithAccessCheck(CompilationUnit* cu,
                                                 const MethodReference& target_method,
                                                 uint32_t unused, uintptr_t unused2,
                                                 uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
       ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeInterfaceTrampolineWithAccessCheck);
       return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
     } else {
@@ -1188,7 +1188,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
     // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(8, pMemcpy), TargetReg(kArg0),
                                  TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
     } else {
@@ -1540,7 +1540,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
     RegLocation rl_start = info->args[2];     // 3rd arg only present in III flavor of IndexOf.
     LoadValueDirectFixed(rl_start, reg_start);
   }
-  RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+  RegStorage r_tgt = cu_->target64 ?
       LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pIndexOf)) :
       LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pIndexOf));
   GenExplicitNullCheck(reg_ptr, info->opt_flags);
@@ -1581,7 +1581,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) {
   LoadValueDirectFixed(rl_cmp, reg_cmp);
   RegStorage r_tgt;
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pStringCompareTo));
     } else {
       r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
@@ -1598,7 +1598,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) {
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     OpReg(kOpBlx, r_tgt);
   } else {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pStringCompareTo));
     } else {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
@@ -1747,7 +1747,8 @@ void Mir2Lir::GenInvoke(CallInfo* info) {
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
   // TODO: Enable instrinsics for x86_64
   // Temporary disable intrinsics for x86_64. We will enable them later step by step.
-  if (cu_->instruction_set != kX86_64) {
+  // Temporary disable intrinsics for Arm64. We will enable them later step by step.
+  if ((cu_->instruction_set != kX86_64) && (cu_->instruction_set != kArm64)) {
     if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
         ->GenIntrinsic(this, info)) {
       return;
@@ -1850,7 +1851,7 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
       }
     } else {
       // TODO: Extract?
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         call_inst = GenInvokeNoInlineCall<8>(this, info->type);
       } else {
         call_inst = GenInvokeNoInlineCall<4>(this, info->type);
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index b26ab579c3..c7e9190ed9 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -709,7 +709,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(CodeOffset start_addr) {
   return res;
 }
 
-int MipsMir2Lir::GetInsnSize(LIR* lir) {
+size_t MipsMir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 62a7f2455c..571adaccc1 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -85,7 +85,7 @@ class MipsMir2Lir FINAL : public Mir2Lir {
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
     ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 40205eabd6..1fc416301c 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -66,7 +66,7 @@ void Mir2Lir::LockArg(int in_position, bool wide) {
   }
 }
 
-// TODO: needs revisit for 64-bit.
+// TODO: simplify when 32-bit targets go hard-float.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -87,10 +87,11 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
     offset += sizeof(uint64_t);
   }
 
-  if (cu_->instruction_set == kX86_64) {
+  if (cu_->target64) {
     RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
     if (!reg_arg.Valid()) {
-      RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+      RegStorage new_reg =
+          wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
       LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32);
       return new_reg;
     } else {
@@ -159,6 +160,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
   return reg_arg;
 }
 
+// TODO: simpilfy when 32-bit targets go hard float.
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -186,7 +188,7 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
       Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
     }
   } else {
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       RegStorage reg = GetArgMappingToPhysicalReg(in_position);
       if (reg.Valid()) {
         OpRegCopy(rl_dest.reg, reg);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ca4d0e48bf..ca65432c58 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -953,8 +953,8 @@ class Mir2Lir : public Backend {
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
     virtual bool GenInlinedAbsLong(CallInfo* info);
-    bool GenInlinedAbsFloat(CallInfo* info);
-    bool GenInlinedAbsDouble(CallInfo* info);
+    virtual bool GenInlinedAbsFloat(CallInfo* info);
+    virtual bool GenInlinedAbsDouble(CallInfo* info);
     bool GenInlinedFloatCvt(CallInfo* info);
     bool GenInlinedDoubleCvt(CallInfo* info);
     virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
@@ -1162,7 +1162,7 @@ class Mir2Lir : public Backend {
     virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0;
     virtual ResourceMask GetPCUseDefEncoding() const = 0;
     virtual uint64_t GetTargetInstFlags(int opcode) = 0;
-    virtual int GetInsnSize(LIR* lir) = 0;
+    virtual size_t GetInsnSize(LIR* lir) = 0;
     virtual bool IsUnconditionalBranch(LIR* lir) = 0;
 
     // Check support for volatile load/store of a given size.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index cae59c88c1..5bb0ee04d4 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -473,14 +473,14 @@ RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       reg = FindLiveReg(wide ? reg_pool_->core64_regs_ : reg_pool_->core_regs_, s_reg);
     } else {
       reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
     }
   }
   if (reg.Valid()) {
-    if (wide && !reg.IsFloat() && !Is64BitInstructionSet(cu_->instruction_set)) {
+    if (wide && !reg.IsFloat() && !cu_->target64) {
       // Only allow reg pairs for core regs on 32-bit targets.
       RegStorage high_reg = FindLiveReg(reg_pool_->core_regs_, s_reg + 1);
       if (high_reg.Valid()) {
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index fb6bd9427b..3f54798b7e 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -407,10 +407,17 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
 
-  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" },
-  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" },
-  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" },
-  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fild32M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M",  "[!0r,!1d]" },
+  { kX86Fild64M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M",  "[!0r,!1d]" },
+  { kX86Fld32M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M",   "[!0r,!1d]" },
+  { kX86Fld64M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M",   "[!0r,!1d]" },
+  { kX86Fstp32M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" },
+  { kX86Fstp64M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" },
+  { kX86Fst32M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M",  "[!0r,!1d]" },
+  { kX86Fst64M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
+  { kX86Fprem,    kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xD9, 0,    0xF8, 0,    0, 0, 0, 0, false }, "Fprem64",  "" },
+  { kX86Fucompp,  kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xDA, 0,    0xE9, 0,    0, 0, 0, 0, false }, "Fucompp",  "" },
+  { kX86Fstsw16R, kNullary, NO_OPERAND,                                         { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
 
   EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
   { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" },
@@ -506,9 +513,80 @@ static uint8_t LowRegisterBits(int32_t raw_reg) {
   return low_reg;
 }
 
+static bool HasModrm(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    case kNullary: return false;
+    case kRegOpcode: return false;
+    default: return true;
+  }
+}
+
+static bool HasSib(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    case kArray: return true;
+    case kArrayReg: return true;
+    case kRegArray: return true;
+    case kArrayImm: return true;
+    case kRegArrayImm: return true;
+    case kShiftArrayImm: return true;
+    case kShiftArrayCl: return true;
+    case kArrayCond: return true;
+    case kCall:
+      switch (entry->opcode) {
+        case kX86CallA: return true;
+        default: return false;
+      }
+    case kPcRel: return true;
+       switch (entry->opcode) {
+         case kX86PcRelLoadRA: return true;
+         default: return false;
+        }
+    default: return false;
+  }
+}
+
+static bool ModrmIsRegReg(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    // There is no modrm for this kind of instruction, therefore the reg doesn't form part of the
+    // modrm:
+    case kNullary: return true;
+    case kRegOpcode: return true;
+    case kMovRegImm: return true;
+    // Regular modrm value of 3 cases, when there is one register the other register holds an
+    // opcode so the base register is special.
+    case kReg: return true;
+    case kRegReg: return true;
+    case kRegRegStore: return true;
+    case kRegImm: return true;
+    case kRegRegImm: return true;
+    case kRegRegImmStore: return true;
+    case kShiftRegImm: return true;
+    case kShiftRegCl: return true;
+    case kRegCond: return true;
+    case kRegRegCond: return true;
+    case kJmp:
+      switch (entry->opcode) {
+        case kX86JmpR: return true;
+        default: return false;
+      }
+    case kCall:
+      switch (entry->opcode) {
+        case kX86CallR: return true;
+        default: return false;
+      }
+    default: return false;
+  }
+}
+
 size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
-                               int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form,
-                               int32_t displacement) {
+                               int32_t raw_base, int32_t displacement) {
+  bool has_modrm = HasModrm(entry);
+  bool has_sib = HasSib(entry);
+  bool r8_form = entry->skeleton.r8_form;
+  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
+  if (has_sib) {
+    DCHECK(!modrm_is_reg_reg);
+  }
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -517,15 +595,19 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int
     }
   }
   if (Gen64Bit() || kIsDebugBuild) {
-    bool registers_need_rex_prefix =
-        NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base) ||
-        (r8_form && RegStorage::RegNum(raw_reg) > 4) ||
-        (r8_reg_reg_form && RegStorage::RegNum(raw_base) > 4);
-    if (registers_need_rex_prefix &&
-        entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
-      DCHECK(Gen64Bit()) << "Attempt to use " << entry->name << " on a non-byte register "
-          << RegStorage::RegNum(raw_reg);
-      ++size;  // rex
+    bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base);
+    if (r8_form) {
+      // Do we need an empty REX prefix to normalize byte registers?
+      registers_need_rex_prefix = registers_need_rex_prefix || (RegStorage::RegNum(raw_reg) >= 4);
+      registers_need_rex_prefix = registers_need_rex_prefix ||
+          (modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4));
+    }
+    if (registers_need_rex_prefix) {
+      DCHECK(Gen64Bit()) << "Attempt to use a 64-bit only addressable register "
+          << RegStorage::RegNum(raw_reg) << " with instruction " << entry->name;
+      if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
+        ++size;  // rex
+      }
     }
   }
   ++size;  // opcode
@@ -535,89 +617,72 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int
       ++size;
     }
   }
-  ++size;  // modrm
-  if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum()
-      || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
-    // SP requires a SIB byte.
-    // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
-    ++size;
+  if (has_modrm) {
+    ++size;  // modrm
   }
-  if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
-    // BP requires an explicit displacement, even when it's 0.
-    if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
-      DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
+  if (!modrm_is_reg_reg) {
+    if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum()
+        || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
+      // SP requires a SIB byte.
+      // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
+      ++size;
+    }
+    if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
+      // BP requires an explicit displacement, even when it's 0.
+      if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
+        DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
+      }
+      size += IS_SIMM8(displacement) ? 1 : 4;
     }
-    size += IS_SIMM8(displacement) ? 1 : 4;
   }
   size += entry->skeleton.immediate_bytes;
   return size;
 }
 
-int X86Mir2Lir::GetInsnSize(LIR* lir) {
+size_t X86Mir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode];
   DCHECK_EQ(entry->opcode, lir->opcode) << entry->name;
+
   switch (entry->kind) {
     case kData:
       return 4;  // 4 bytes of data.
     case kNop:
       return lir->operands[0];  // Length of nop is sole operand.
     case kNullary:
-      // Substract 1 for modrm which isn't used.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0) - 1;
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0);
     case kRegOpcode:  // lir operands - 0: reg
-      // Substract 1 for modrm  which isn't used.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      // Note: RegOpcode form passes reg as REX_R but encodes it as REX_B.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false, 0) - 1;
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kReg:  // lir operands - 0: reg
-      // Note: Reg form passes reg as REX_R but encodes it as REX_B.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kMem:  // lir operands - 0: base, 1: disp
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
-                         lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
-                         lir->operands[3]);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
-      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0],
-                         false, entry->skeleton.r8_form, false, lir->operands[1]);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
     case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
-      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0],
-                         false, entry->skeleton.r8_form, false, lir->operands[1]);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
       return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
-                         true, entry->skeleton.r8_form, false, lir->operands[3]);
+                         lir->operands[3]);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
-      DCHECK_EQ(false, entry->skeleton.r8_form);
       // Thread displacement size is always 32bit.
-      return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, false, false, false,
-                         0x12345678);
+      return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, 0x12345678);
     case kRegReg:  // lir operands - 0: reg1, 1: reg2
-      // Note: RegReg form passes reg2 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
     case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
-      // Note: RegRegStore form passes reg1 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[1], NO_REG, lir->operands[0], 0);
     case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1],
-                         false, entry->skeleton.r8_form, false, lir->operands[2]);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                         true, entry->skeleton.r8_form, false, lir->operands[4]);
+                         lir->operands[4]);
     case kRegThread:  // lir operands - 0: reg, 1: disp
       // Thread displacement size is always 32bit.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false,
-                         0x12345678);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0x12345678);
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
-      size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
       // AX opcodes don't require the modrm byte.
       if (entry->skeleton.ax_opcode == 0) {
         return size;
@@ -626,84 +691,62 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
       }
     }
     case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
-                         false, false, false, lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0],
-                         true, false, false, lir->operands[3]);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kThreadImm:  // lir operands - 0: disp, 1: imm
       // Thread displacement size is always 32bit.
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
     case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
       // Note: RegRegImm form passes reg2 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, 0);
     case kRegRegImmStore:  // lir operands - 0: reg2, 1: reg1, 2: imm
       // Note: RegRegImmStore form passes reg1 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG,
-                         false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0);
+      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, 0);
     case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1],
-                         false, entry->skeleton.r8_form, false, lir->operands[2]);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kRegArrayImm:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
       return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                         true, entry->skeleton.r8_form, false, lir->operands[4]);
+                         lir->operands[4]);
     case kMovRegImm:  // lir operands - 0: reg, 1: immediate
       return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 +
           entry->skeleton.immediate_bytes;
     case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0) -
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0) -
           (lir->operands[1] == 1 ? 1 : 0);
     case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
-                         false, entry->skeleton.r8_form, false, lir->operands[1]) -
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]) -
           (lir->operands[2] == 1 ? 1 : 0);
     case kShiftArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0],
-                         true, entry->skeleton.r8_form, false, lir->operands[3]) -
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]) -
           (lir->operands[4] == 1 ? 1 : 0);
     case kShiftRegCl:  // lir operands - 0: reg, 1: cl
       DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[1]));
       // Note: ShiftRegCl form passes reg as reg but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
     case kShiftMemCl:  // lir operands - 0: base, 1: disp, 2: cl
-      DCHECK_EQ(false, entry->skeleton.r8_form);
       DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2]));
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0],
-                         false, false, false, lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cl
-      DCHECK_EQ(false, entry->skeleton.r8_form);
       DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4]));
       return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
-                         true, false, false, lir->operands[3]);
+                         lir->operands[3]);
     case kRegCond:  // lir operands - 0: reg, 1: cond
-      // Note: RegCond form passes reg as REX_R but encodes it as REX_B.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
-                         false, entry->skeleton.r8_form, false, 0);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
-                         lir->operands[1]);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
       DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
-                         lir->operands[3]);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kRegRegCond:  // lir operands - 0: reg1, 1: reg2, 2: cond
-      // Note: RegRegCond form passes reg2 as index but encodes it using base.
       DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, false, false, false, 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
     case kRegMemCond:  // lir operands - 0: reg, 1: base, 2: disp, 3:cond
       DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], false, false, false,
-                         lir->operands[2]);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -718,7 +761,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
         return 5;  // opcode + rel32
       } else if (lir->opcode == kX86JmpT) {
         // Thread displacement size is always 32bit.
-        return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
+        return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
       } else {
         DCHECK(lir->opcode == kX86JmpR);
         if (NeedsRex(lir->operands[0])) {
@@ -732,14 +775,12 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
         case kX86CallI: return 5;  // opcode 0:disp
         case kX86CallR: return 2;  // opcode modrm
         case kX86CallM:  // lir operands - 0: base, 1: disp
-          return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false,
-                             lir->operands[1]);
+          return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
         case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-          return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false,
-                             lir->operands[3]);
+          return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
         case kX86CallT:  // lir operands - 0: disp
           // Thread displacement size is always 32bit.
-          return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678);
+          return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
         default:
           break;
       }
@@ -749,7 +790,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
         // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
         // Force the displacement size to 32bit, it will hold a computed offset later.
         return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                           true, false, false, 0x12345678);
+                           0x12345678);
       } else {
         DCHECK_EQ(entry->opcode, kX86PcRelAdr);
         return 5;  // opcode with reg + 4 byte immediate
@@ -758,7 +799,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
           ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
-                      lir->operands[0], NO_REG, NO_REG, false, false, false, 0) -
+                      lir->operands[0], NO_REG, NO_REG, 0) -
               // Shorter ax encoding.
               (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
     case kUnimplemented:
@@ -802,8 +843,7 @@ void X86Mir2Lir::CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw
 }
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
-                            int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                            bool r8_form, bool modrm_is_reg_reg) {
+                            int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
   // REX.WRXB
   // W - 64-bit operand
   // R - MODRM.reg
@@ -813,6 +853,9 @@ void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
   bool r = NeedsRex(raw_reg_r);
   bool x = NeedsRex(raw_reg_x);
   bool b = NeedsRex(raw_reg_b);
+  bool r8_form = entry->skeleton.r8_form;
+  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
+
   uint8_t rex = 0;
   if (r8_form) {
     // Do we need an empty REX prefix to normalize byte register addressing?
@@ -881,9 +924,8 @@ void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) {
 }
 
 void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                                     int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                                     bool r8_form, bool modrm_is_reg_reg) {
-  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b, r8_form, modrm_is_reg_reg);
+                                     int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
+  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b);
   EmitOpcode(entry);
 }
 
@@ -977,7 +1019,7 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) {
 
 void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -985,7 +1027,7 @@ void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) {
 
 void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
   // There's no 3-byte instruction with +rd
   DCHECK(entry->skeleton.opcode != 0x0F ||
          (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
@@ -998,7 +1040,7 @@ void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) {
 
 void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
@@ -1008,7 +1050,7 @@ void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) {
 
 void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1022,7 +1064,7 @@ void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_
 void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
                              int scale, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
@@ -1033,7 +1075,7 @@ void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int3
 void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                             int32_t raw_reg) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form, false);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(low_reg, low_base, disp);
@@ -1051,7 +1093,7 @@ void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_
 void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
                               int32_t raw_index, int scale, int32_t disp) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base, entry->skeleton.r8_form, false);
+  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
@@ -1070,7 +1112,7 @@ void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int
 void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                             int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1081,7 +1123,7 @@ void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry,
                               int32_t raw_base, int32_t raw_index, int scale, int32_t disp,
                               int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
@@ -1092,7 +1134,7 @@ void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry,
 void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG, false, false);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   EmitModrmThread(low_reg);
   code_buffer_.push_back(disp & 0xFF);
@@ -1107,7 +1149,7 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int
 void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
   CheckValidByteRegister(entry, raw_reg1);
   CheckValidByteRegister(entry, raw_reg2);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, entry->skeleton.r8_form, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
   uint8_t low_reg1 = LowRegisterBits(raw_reg1);
   uint8_t low_reg2 = LowRegisterBits(raw_reg2);
   uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
@@ -1120,7 +1162,7 @@ void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32
 void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
                                int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false, true);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
   uint8_t low_reg1 = LowRegisterBits(raw_reg1);
   uint8_t low_reg2 = LowRegisterBits(raw_reg2);
   uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
@@ -1134,7 +1176,7 @@ void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
                                int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) {
   DCHECK(!RegStorage::IsFloat(raw_reg));
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form, false);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(low_reg, low_base, disp);
@@ -1151,7 +1193,7 @@ void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry,
 
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
@@ -1165,7 +1207,7 @@ void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_
 
 void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1177,7 +1219,7 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_
 
 void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, false, true);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   code_buffer_.push_back(0xB8 + low_reg);
   switch (entry->skeleton.immediate_bytes) {
@@ -1205,7 +1247,7 @@ void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int
 
 void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1228,7 +1270,7 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, i
 void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) {
   CheckValidByteRegister(entry, raw_reg);
   DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1244,7 +1286,7 @@ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base,
                                 int32_t displacement, int32_t raw_cl) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1258,7 +1300,7 @@ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base,
 void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                                  int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1279,7 +1321,7 @@ void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base,
 
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
@@ -1322,7 +1364,7 @@ void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, i
                                 int32_t cc) {
   // Generate prefix and opcode without the condition.
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false, true);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_GE(cc, 0);
@@ -1348,7 +1390,7 @@ void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, i
                                 int32_t disp, int32_t cc) {
   // Generate prefix and opcode without the condition.
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base, false, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_GE(cc, 0);
@@ -1383,7 +1425,7 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int32_t rel) {
   } else {
     DCHECK(entry->opcode == kX86JmpR);
     DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, NO_REG, NO_REG, rel, false, true);
+    EmitPrefix(entry, NO_REG, NO_REG, rel);
     code_buffer_.push_back(entry->skeleton.opcode);
     uint8_t low_reg = LowRegisterBits(rel);
     uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
@@ -1411,7 +1453,7 @@ void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc) {
 
 void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1420,7 +1462,7 @@ void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int3
 
 void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   DCHECK_EQ(4, entry->skeleton.immediate_bytes);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1432,7 +1474,7 @@ void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) {
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1457,7 +1499,7 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t
   }
   if (entry->opcode == kX86PcRelLoadRA) {
     DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table, false, false);
+    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table);
     code_buffer_.push_back(entry->skeleton.opcode);
     DCHECK_NE(0x0F, entry->skeleton.opcode);
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1486,7 +1528,7 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t
 void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
   DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, false, false);
+  EmitPrefix(entry, raw_reg, NO_REG, NO_REG);
   code_buffer_.push_back(0xE8);  // call +0
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
@@ -1503,7 +1545,7 @@ void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
   UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " "
                          << BuildInsnString(entry->fmt, lir, 0);
-  for (int i = 0; i < GetInsnSize(lir); ++i) {
+  for (size_t i = 0; i < GetInsnSize(lir); ++i) {
     code_buffer_.push_back(0xCC);  // push breakpoint instruction - int 3
   }
 }
@@ -1800,8 +1842,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
         EmitUnimplemented(entry, lir);
         break;
     }
-    CHECK_EQ(static_cast<size_t>(GetInsnSize(lir)),
-             code_buffer_.size() - starting_cbuf_size)
+    DCHECK_EQ(lir->flags.size, GetInsnSize(lir));
+    CHECK_EQ(lir->flags.size, code_buffer_.size() - starting_cbuf_size)
         << "Instruction size mismatch for entry: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
   }
   return res;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index f5fce34f2b..dd5dab290d 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -175,7 +175,7 @@ void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) {
   }
   NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
   NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
                             rs_rX86_ARG1, true);
   } else {
@@ -185,7 +185,7 @@ void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) {
 }
 
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ?
+  int ex_offset = cu_->target64 ?
       Thread::ExceptionOffset<8>().Int32Value() :
       Thread::ExceptionOffset<4>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
@@ -201,7 +201,7 @@ void X86Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
   RegStorage reg_card_base = AllocTemp();
   RegStorage reg_card_no = AllocTemp();
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
-  int ct_offset = Is64BitInstructionSet(cu_->instruction_set) ?
+  int ct_offset = cu_->target64 ?
       Thread::CardTableOffset<8>().Int32Value() :
       Thread::CardTableOffset<4>().Int32Value();
   if (Gen64Bit()) {
@@ -255,7 +255,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
         m2l_->OpRegImm(kOpAdd, rs_rX86_SP, sp_displace_);
         m2l_->ClobberCallerSave();
         // Assumes codegen and target are in thumb2 mode.
-        if (Is64BitInstructionSet(cu_->instruction_set)) {
+        if (cu_->target64) {
           m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow),
                            false /* MarkSafepointPC */, false /* UseLink */);
         } else {
@@ -276,7 +276,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
     // in case a signal comes in that's not using an alternate signal stack and the large frame may
     // have moved us outside of the reserved area at the end of the stack.
     // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
     } else {
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index a92608fadc..d482e58521 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -121,7 +121,7 @@ class X86Mir2Lir : public Mir2Lir {
   std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
   ResourceMask GetPCUseDefEncoding() const OVERRIDE;
   uint64_t GetTargetInstFlags(int opcode);
-  int GetInsnSize(LIR* lir);
+  size_t GetInsnSize(LIR* lir) OVERRIDE;
   bool IsUnconditionalBranch(LIR* lir);
 
   // Check support for volatile load/store of a given size.
@@ -148,12 +148,15 @@ class X86Mir2Lir : public Mir2Lir {
                         RegLocation rl_src2);
   void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                        RegLocation rl_src2);
+  void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double);
   void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                 RegLocation rl_src2);
   void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
   bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
   bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
   bool GenInlinedSqrt(CallInfo* info);
+  bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
+  bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
   bool GenInlinedPeek(CallInfo* info, OpSize size);
   bool GenInlinedPoke(CallInfo* info, OpSize size);
   void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
@@ -392,16 +395,13 @@ class X86Mir2Lir : public Mir2Lir {
 
  protected:
   size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
-                     int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form,
-                     int32_t displacement);
+                     int32_t raw_base, int32_t displacement);
   void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
   void EmitPrefix(const X86EncodingMap* entry,
-                  int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                  bool r8_form_r, bool modrm_is_reg_reg);
+                  int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b);
   void EmitOpcode(const X86EncodingMap* entry);
   void EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                           int32_t reg_r, int32_t reg_x, int32_t reg_b, bool r8_form,
-                           bool modrm_is_reg_reg);
+                           int32_t reg_r, int32_t reg_x, int32_t reg_b);
   void EmitDisp(uint8_t base, int32_t disp);
   void EmitModrmThread(uint8_t reg_or_opcode);
   void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp);
@@ -798,6 +798,14 @@ class X86Mir2Lir : public Mir2Lir {
    */
   void AnalyzeDoubleUse(RegLocation rl_use);
 
+  /*
+   * @brief Analyze one invoke-static MIR instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Instruction to analyze.
+   */
+  void AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir);
+
   bool Gen64Bit() const  { return gen64bit_; }
 
   // Information derived from analysis of MIR
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index f6f06170bb..20bb7bf117 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -48,16 +48,7 @@ void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
       break;
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
-      FlushAllRegs();   // Send everything to home location
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturn(kFPReg);
-      StoreValue(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
       return;
     case Instruction::NEG_FLOAT:
       GenNegFloat(rl_dest, rl_src1);
@@ -110,16 +101,7 @@ void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
       break;
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
-      FlushAllRegs();   // Send everything to home location
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturnWide(kFPReg);
-      StoreValueWide(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
       return;
     case Instruction::NEG_DOUBLE:
       GenNegDouble(rl_dest, rl_src1);
@@ -356,6 +338,110 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
   }
 }
 
+void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
+  // Compute offsets to the source and destination VRs on stack.
+  int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
+  int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
+  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
+
+  // Update the in-register state of sources.
+  rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
+  rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
+
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
+  // If the source is in physical register, then put it in its location on stack.
+  if (rl_src1.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
+
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
+      FlushSpecificReg(reg_info);
+      // ResetDef to prevent NullifyRange from removing stores.
+      ResetDef(rl_src1.reg);
+    } else {
+      // It must have been register promoted if it is not a temp but is still in physical
+      // register. Since we need it to be in memory to convert, we place it there now.
+      StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32);
+    }
+  }
+
+  if (rl_src2.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      FlushSpecificReg(reg_info);
+      ResetDef(rl_src2.reg);
+    } else {
+      StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32);
+    }
+  }
+
+  int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
+
+  // Push the source virtual registers onto the x87 stack.
+  LIR *fld_2 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src2_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  LIR *fld_1 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src1_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  FlushReg(rs_rAX);
+  Clobber(rs_rAX);
+  LockTemp(rs_rAX);
+
+  LIR* retry = NewLIR0(kPseudoTargetLabel);
+
+  // Divide ST(0) by ST(1) and place result to ST(0).
+  NewLIR0(kX86Fprem);
+
+  // Move FPU status word to AX.
+  NewLIR0(kX86Fstsw16R);
+
+  // Check if reduction is complete.
+  OpRegImm(kOpAnd, rs_rAX, 0x400);
+
+  // If no then continue to compute remainder.
+  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  branch->target = retry;
+
+  FreeTemp(rs_rAX);
+
+  // Now store result in the destination VR's stack location.
+  int displacement = dest_v_reg_offset + LOWORD_OFFSET;
+  int opcode = is_double ? kX86Fst64M : kX86Fst32M;
+  LIR *fst = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
+  AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
+
+  // Pop ST(1) and ST(0).
+  NewLIR0(kX86Fucompp);
+
+  /*
+   * The result is in a physical register if it was in a temp or was register
+   * promoted. For that reason it is enough to check if it is in physical
+   * register. If it is, then we must do all of the bookkeeping necessary to
+   * invalidate temp (if needed) and load in promoted register (if needed).
+   * If the result's location is in memory, then we do not need to do anything
+   * more since the fstp has already placed the correct value in memory.
+   */
+  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
+      UpdateLocTyped(rl_dest, kFPReg);
+  if (rl_result.location == kLocPhysReg) {
+    rl_result = EvalLoc(rl_dest, kFPReg, true);
+    if (is_double) {
+      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
+      StoreFinalValueWide(rl_dest, rl_result);
+    } else {
+      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
+      StoreFinalValue(rl_dest, rl_result);
+    }
+  }
+}
+
 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2) {
   bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
@@ -501,6 +587,107 @@ bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
   return true;
 }
 
+bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  // Get the argument
+  RegLocation rl_src = info->args[0];
+
+  // Get the inlined intrinsic target virtual register
+  RegLocation rl_dest = InlineTarget(info);
+
+  // Get the virtual register number
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  if (rl_dest.s_reg_low == INVALID_SREG) {
+    // Result is unused, the code is dead. Inlining successful, no code generated.
+    return true;
+  }
+  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
+  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+
+  // if argument is the same as inlined intrinsic target
+  if (v_src_reg == v_dst_reg) {
+    rl_src = UpdateLoc(rl_src);
+
+    // if argument is in the physical register
+    if (rl_src.location == kLocPhysReg) {
+      rl_src = LoadValue(rl_src, kCoreReg);
+      OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
+      StoreValue(rl_dest, rl_src);
+      return true;
+    }
+    // the argument is in memory
+    DCHECK((rl_src.location == kLocDalvikFrame) ||
+         (rl_src.location == kLocCompilerTemp));
+
+    // Operate directly into memory.
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement, 0x7fffffff);
+    AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
+    AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
+    return true;
+  } else {
+    rl_src = LoadValue(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
+    StoreValue(rl_dest, rl_result);
+    return true;
+  }
+}
+
+bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_dest = InlineTargetWide(info);
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  if (rl_dest.s_reg_low == INVALID_SREG) {
+    // Result is unused, the code is dead. Inlining successful, no code generated.
+    return true;
+  }
+  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
+  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+  rl_src = UpdateLocWide(rl_src);
+
+  // if argument is in the physical XMM register
+  if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
+    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+    if (rl_result.reg != rl_src.reg) {
+      LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+    } else {
+      RegStorage sign_mask = AllocTempDouble();
+      LoadConstantWide(sign_mask, 0x7fffffffffffffff);
+      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
+      FreeTemp(sign_mask);
+    }
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  } else if (v_src_reg == v_dst_reg) {
+    // if argument is the same as inlined intrinsic target
+    // if argument is in the physical register
+    if (rl_src.location == kLocPhysReg) {
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
+      StoreValueWide(rl_dest, rl_src);
+      return true;
+    }
+    // the argument is in memory
+    DCHECK((rl_src.location == kLocDalvikFrame) ||
+           (rl_src.location == kLocCompilerTemp));
 
+    // Operate directly into memory.
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
+    return true;
+  } else {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegCopyWide(rl_result.reg, rl_src.reg);
+    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  }
+}
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 05b5e4354d..b3428133e0 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -991,7 +991,7 @@ void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
       }
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       new_index, m2l_->TargetReg(kArg1), true);
       } else {
@@ -1031,7 +1031,7 @@ void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
       } else {
@@ -1054,7 +1054,7 @@ void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
 
 // Test suspend flag, return target of taken suspend branch
 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
   } else {
     OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
@@ -1810,7 +1810,7 @@ RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation
           NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
           LoadConstant(rl_result.reg.GetLow(), 0);
         } else {
-          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
           NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
                   shift_amount);
@@ -1829,7 +1829,7 @@ RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation
           NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
         } else {
-          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
                   shift_amount);
@@ -1846,7 +1846,7 @@ RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation
           NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
           LoadConstant(rl_result.reg.GetHigh(), 0);
         } else {
-          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
                   shift_amount);
@@ -2311,7 +2311,7 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // Caller function returns Class* in kArg0.
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     } else {
@@ -2337,7 +2337,7 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k
       // Need to test presence of type in dex cache at runtime.
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
@@ -2352,6 +2352,11 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
   RegLocation rl_result = GetReturn(kRefReg);
 
+  // On x86-64 kArg0 is not EAX, so we have to copy ref from kArg0 to EAX.
+  if (Gen64Bit()) {
+    OpRegCopy(rl_result.reg, TargetReg(kArg0));
+  }
+
   // For 32-bit, SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
 
@@ -2375,7 +2380,7 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k
       branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
     }
     OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
     } else {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 483d8cf257..92753e43a4 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -441,6 +441,31 @@ void X86Mir2Lir::ClobberCallerSave() {
   Clobber(rs_rCX);
   Clobber(rs_rDX);
   Clobber(rs_rBX);
+
+  Clobber(rs_fr0);
+  Clobber(rs_fr1);
+  Clobber(rs_fr2);
+  Clobber(rs_fr3);
+  Clobber(rs_fr4);
+  Clobber(rs_fr5);
+  Clobber(rs_fr6);
+  Clobber(rs_fr7);
+
+  if (Gen64Bit()) {
+    Clobber(rs_r8);
+    Clobber(rs_r9);
+    Clobber(rs_r10);
+    Clobber(rs_r11);
+
+    Clobber(rs_fr8);
+    Clobber(rs_fr9);
+    Clobber(rs_fr10);
+    Clobber(rs_fr11);
+    Clobber(rs_fr12);
+    Clobber(rs_fr13);
+    Clobber(rs_fr14);
+    Clobber(rs_fr15);
+  }
 }
 
 RegLocation X86Mir2Lir::GetReturnWideAlt() {
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index b93e3e8833..46e877f8f9 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -18,6 +18,8 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/dataflow_iterator-inl.h"
 #include "x86_lir.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 
 namespace art {
 
@@ -953,6 +955,9 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir) {
     case Instruction::PACKED_SWITCH:
       store_method_addr_ = true;
       break;
+    case Instruction::INVOKE_STATIC:
+      AnalyzeInvokeStatic(opcode, bb, mir);
+      break;
     default:
       // Other instructions are not interesting yet.
       break;
@@ -1020,4 +1025,22 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) {
   DCHECK(CheckCorePoolSanity());
   return loc;
 }
+
+void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir) {
+  uint32_t index = mir->dalvikInsn.vB;
+  if (!(mir->optimization_flags & MIR_INLINED)) {
+    DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
+    InlineMethod method;
+    if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
+        ->IsIntrinsic(index, &method)) {
+      switch (method.opcode) {
+        case kIntrinsicAbsDouble:
+          store_method_addr_ = true;
+          break;
+        default:
+          break;
+      }
+    }
+  }
+}
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f1b5811a33..28b9dca193 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -572,8 +572,15 @@ enum X86OpCode {
   kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
   kX86Fild32M,                  // push 32-bit integer on x87 stack
   kX86Fild64M,                  // push 64-bit integer on x87 stack
+  kX86Fld32M,                   // push float on x87 stack
+  kX86Fld64M,                   // push double on x87 stack
   kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
   kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
+  kX86Fst32M,                   // do 32-bit store
+  kX86Fst64M,                   // do 64-bit store
+  kX86Fprem,                    // remainder from dividing of two floating point values
+  kX86Fucompp,                  // compare floating point values and pop x87 fp stack twice
+  kX86Fstsw16R,                 // store FPU status word
   Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
   kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 43243254f1..e26745ad5e 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -117,6 +117,16 @@ void MIRGraph::ComputeDFSOrders() {
   RecordDFSOrders(GetEntryBlock());
 
   num_reachable_blocks_ = dfs_order_->Size();
+
+  if (num_reachable_blocks_ != num_blocks_) {
+    // Hide all unreachable blocks.
+    AllNodesIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+      if (!bb->visited) {
+        bb->Hide(cu_);
+      }
+    }
+  }
 }
 
 /*
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index c4af9cb55c..db383c4d0b 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -123,6 +123,16 @@ bool MIRGraph::SetHigh(int index) {
  */
 bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
   SSARepresentation *ssa_rep = mir->ssa_rep;
+
+  /*
+   * The dex bytecode definition does not explicitly outlaw the definition of the same
+   * virtual register to be used in both a 32-bit and 64-bit pair context.  However, dx
+   * does not generate this pattern (at least recently).  Further, in the next revision of
+   * dex, we will forbid this.  To support the few cases in the wild, detect this pattern
+   * and punt to the interpreter.
+   */
+  bool type_mismatch = false;
+
   if (ssa_rep) {
     uint64_t attrs = GetDataFlowAttributes(mir);
     const int* uses = ssa_rep->uses;
@@ -145,6 +155,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
       }
     }
 
+
     // Handles uses
     int next = 0;
     if (attrs & DF_UA) {
@@ -162,6 +173,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
         SRegToVReg(uses[next + 1]));
         next += 2;
       } else {
+        type_mismatch |= reg_location_[uses[next]].wide;
         next++;
       }
     }
@@ -180,6 +192,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
                              SRegToVReg(uses[next + 1]));
         next += 2;
       } else {
+        type_mismatch |= reg_location_[uses[next]].wide;
         next++;
       }
     }
@@ -196,6 +209,8 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
         reg_location_[uses[next + 1]].high_word = true;
         DCHECK_EQ(SRegToVReg(uses[next])+1,
         SRegToVReg(uses[next + 1]));
+      } else {
+        type_mismatch |= reg_location_[uses[next]].wide;
       }
     }
 
@@ -205,6 +220,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
         (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT)) {
       switch (cu_->shorty[0]) {
           case 'I':
+            type_mismatch |= reg_location_[uses[0]].wide;
             changed |= SetCore(uses[0]);
             break;
           case 'J':
@@ -215,6 +231,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
             reg_location_[uses[1]].high_word = true;
             break;
           case 'F':
+            type_mismatch |= reg_location_[uses[0]].wide;
             changed |= SetFp(uses[0]);
             break;
           case 'D':
@@ -225,6 +242,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
             reg_location_[uses[1]].high_word = true;
             break;
           case 'L':
+            type_mismatch |= reg_location_[uses[0]].wide;
             changed |= SetRef(uses[0]);
             break;
           default: break;
@@ -261,6 +279,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
           (mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC_RANGE))) {
         reg_location_[uses[next]].defined = true;
         reg_location_[uses[next]].ref = true;
+        type_mismatch |= reg_location_[uses[next]].wide;
         next++;
       }
       uint32_t cpos = 1;
@@ -286,12 +305,15 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
               i++;
               break;
             case 'F':
+              type_mismatch |= reg_location_[uses[i]].wide;
               ssa_rep->fp_use[i] = true;
               break;
             case 'L':
+              type_mismatch |= reg_location_[uses[i]].wide;
               changed |= SetRef(uses[i]);
               break;
             default:
+              type_mismatch |= reg_location_[uses[i]].wide;
               changed |= SetCore(uses[i]);
               break;
           }
@@ -367,6 +389,12 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
       }
     }
   }
+  if (type_mismatch) {
+    LOG(WARNING) << "Deprecated dex type mismatch, interpreting "
+                 << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    LOG(INFO) << "@ 0x" << std::hex << mir->offset;
+    SetPuntToInterpreter(true);
+  }
   return changed;
 }
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 16c1e00c83..3e326f0633 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1918,7 +1918,7 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t
     }
   }
   uint64_t duration_ns = NanoTime() - start_ns;
-  if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning())) {
+  if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning()) && !kIsDebugBuild) {
     LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file)
                  << " took " << PrettyDuration(duration_ns);
   }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 65bc3185eb..c6b9161b63 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -535,7 +535,7 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
                                                       NullHandle<mirror::ClassLoader>(),
                                                       NullHandle<mirror::ArtMethod>(),
                                                       invoke_type);
-    CHECK(method != NULL);
+    CHECK(method != NULL) << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
     // Portable code offsets are set by ElfWriterMclinker::FixupCompiledCodeOffset after linking.
     method->SetQuickOatCodeOffset(offsets.code_offset_);
     method->SetOatNativeGcMapOffset(offsets.gc_map_offset_);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 342a191a47..f4b12e2d38 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -889,7 +889,6 @@ void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
   __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
 }
 
-
 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch1(
       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ef17ca73df..ebeef9dfc1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -35,6 +35,9 @@ x86_64::X86_64ManagedRegister Location::AsX86_64() const {
 
 namespace x86_64 {
 
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
@@ -53,7 +56,8 @@ static Location X86_64CpuLocation(Register reg) {
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
       : CodeGenerator(graph, kNumberOfRegIds),
         location_builder_(graph, this),
-        instruction_visitor_(graph, this) {}
+        instruction_visitor_(graph, this),
+        move_resolver_(graph->GetArena(), this) {}
 
 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
       : HGraphVisitor(graph),
@@ -89,6 +93,9 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool* blocked_registers) const {
   // Stack register is always reserved.
   blocked_registers[RSP] = true;
 
+  // Block the register used as TMP.
+  blocked_registers[TMP] = true;
+
   // TODO: We currently don't use Quick's callee saved registers.
   blocked_registers[RBX] = true;
   blocked_registers[RBP] = true;
@@ -192,8 +199,8 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
     } else {
       DCHECK(source.IsStackSlot());
-      __ movl(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex()));
-      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX));
+      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
@@ -201,8 +208,8 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex()));
-      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX));
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
   }
 }
@@ -211,7 +218,7 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HIn
   if (instruction->AsIntConstant() != nullptr) {
     Immediate imm(instruction->AsIntConstant()->GetValue());
     if (location.IsRegister()) {
-      __ movq(location.AsX86_64().AsCpuRegister(), imm);
+      __ movl(location.AsX86_64().AsCpuRegister(), imm);
     } else {
       __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
     }
@@ -220,8 +227,8 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HIn
     if (location.IsRegister()) {
       __ movq(location.AsX86_64().AsCpuRegister(), Immediate(value));
     } else {
-      __ movq(CpuRegister(RAX), Immediate(value));
-      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(RAX));
+      __ movq(CpuRegister(TMP), Immediate(value));
+      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
     }
   } else if (instruction->AsLoadLocal() != nullptr) {
     switch (instruction->GetType()) {
@@ -288,7 +295,7 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
 
 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, X86_64CpuLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
   if_instr->SetLocations(locations);
 }
 
@@ -344,9 +351,9 @@ void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) {
 
 void LocationsBuilderX86_64::VisitEqual(HEqual* equal) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
-  locations->SetInAt(0, X86_64CpuLocation(RAX));
-  locations->SetInAt(1, X86_64CpuLocation(RCX));
-  locations->SetOut(X86_64CpuLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
   equal->SetLocations(locations);
 }
 
@@ -364,7 +371,7 @@ void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
@@ -545,9 +552,9 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
   switch (add->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(0, X86_64CpuLocation(RAX));
-      locations->SetInAt(1, X86_64CpuLocation(RCX));
-      locations->SetOut(X86_64CpuLocation(RAX));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
     }
 
@@ -566,11 +573,15 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
 
 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
   switch (add->GetResultType()) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
     case Primitive::kPrimLong: {
-      DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
-                locations->Out().AsX86_64().AsCpuRegister().AsRegister());
       __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(),
               locations->InAt(1).AsX86_64().AsCpuRegister());
       break;
@@ -593,9 +604,9 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(0, X86_64CpuLocation(RAX));
-      locations->SetInAt(1, X86_64CpuLocation(RCX));
-      locations->SetOut(X86_64CpuLocation(RAX));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
     }
 
@@ -614,11 +625,15 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
 
 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
   LocationSummary* locations = sub->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
   switch (sub->GetResultType()) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
     case Primitive::kPrimLong: {
-      DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
-                locations->Out().AsX86_64().AsCpuRegister().AsRegister());
       __ subq(locations->InAt(0).AsX86_64().AsCpuRegister(),
               locations->InAt(1).AsX86_64().AsCpuRegister());
       break;
@@ -671,8 +686,8 @@ void InstructionCodeGeneratorX86_64::VisitParameterValue(HParameterValue* instru
 
 void LocationsBuilderX86_64::VisitNot(HNot* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, X86_64CpuLocation(RAX));
-  locations->SetOut(X86_64CpuLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
   instruction->SetLocations(locations);
 }
 
@@ -701,7 +716,85 @@ void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverX86_64::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister()) {
+    if (destination.IsRegister()) {
+      __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
+    }
+  } else if (source.IsStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movl(destination.AsX86_64().AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) {
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg));
+  __ movl(CpuRegister(reg), CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movl(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
+void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister() && destination.IsRegister()) {
+    __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+  } else if (source.IsRegister() && destination.IsStackSlot()) {
+    Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsRegister()) {
+    Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+
+void ParallelMoveResolverX86_64::SpillScratch(int reg) {
+  __ pushq(CpuRegister(reg));
+}
+
+
+void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
+  __ popq(CpuRegister(reg));
 }
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ac7ee9ffc4..f07df292e0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -19,6 +19,7 @@
 
 #include "code_generator.h"
 #include "nodes.h"
+#include "parallel_move_resolver.h"
 #include "utils/x86_64/assembler_x86_64.h"
 
 namespace art {
@@ -55,6 +56,27 @@ class InvokeDexCallingConventionVisitor {
 
 class CodeGeneratorX86_64;
 
+class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
+ public:
+  ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
+      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+  virtual void EmitMove(size_t index) OVERRIDE;
+  virtual void EmitSwap(size_t index) OVERRIDE;
+  virtual void SpillScratch(int reg) OVERRIDE;
+  virtual void RestoreScratch(int reg) OVERRIDE;
+
+  X86_64Assembler* GetAssembler() const;
+
+ private:
+  void Exchange(CpuRegister reg, int mem);
+  void Exchange(int mem1, int mem2);
+
+  CodeGeneratorX86_64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64);
+};
+
 class LocationsBuilderX86_64 : public HGraphVisitor {
  public:
   LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
@@ -123,6 +145,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
     return &assembler_;
   }
 
+  ParallelMoveResolverX86_64* GetMoveResolver() {
+    return &move_resolver_;
+  }
+
   int32_t GetStackSlot(HLocal* local) const;
   virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -150,6 +176,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
 
   LocationsBuilderX86_64 location_builder_;
   InstructionCodeGeneratorX86_64 instruction_visitor_;
+  ParallelMoveResolverX86_64 move_resolver_;
   X86_64Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 8b7c4f1ff1..e63122ffed 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -65,7 +65,7 @@ class RegisterAllocator {
 
   static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
   static bool Supports(InstructionSet instruction_set) {
-    return instruction_set == kX86 || instruction_set == kArm;
+    return instruction_set == kX86 || instruction_set == kArm || instruction_set == kX86_64;
   }
 
   size_t GetNumberOfSpillSlots() const {
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index b07eed390f..41d1529ef5 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -138,8 +138,8 @@ void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
-  EmitUint8(0x89);
-  EmitRegisterOperand(src.LowBits(), dst.LowBits());
+  EmitUint8(0x8B);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
 }
 
 
@@ -821,6 +821,15 @@ void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
   EmitRegisterOperand(dst.LowBits(), src.LowBits());
 }
 
+
+void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x87);
+  EmitOperand(dst.LowBits(), Operand(src));
+}
+
+
 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6276603757..9aa5a54df4 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -375,6 +375,7 @@ class X86_64Assembler FINAL : public Assembler {
   void fptan();
 
   void xchgl(CpuRegister dst, CpuRegister src);
+  void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
   void cmpl(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 799db9f5bd..f7bad8b057 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -125,6 +125,16 @@ TEST_F(AssemblerX86_64Test, XorqImm) {
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
 }
 
+TEST_F(AssemblerX86_64Test, Movl) {
+  GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11));
+  GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11));
+  const char* expected =
+    "movl %R11d, %R8d\n"
+    "movl %R11d, %EAX\n";
+
+  DriverStr(expected, "movl");
+}
+
 
 std::string setcc_test_fn(x86_64::X86_64Assembler* assembler) {
   // From Condition
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 58a0379fa6..ca9eae31e9 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -30,6 +30,7 @@ namespace x86_64 {
 class CpuRegister {
  public:
   explicit CpuRegister(Register r) : reg_(r) {}
+  explicit CpuRegister(int r) : reg_(Register(r)) {}
   Register AsRegister() const {
     return reg_;
   }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index e2943d39b6..c3f20828cc 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1093,7 +1093,9 @@ static int dex2oat(int argc, char** argv) {
   }
 
   if (compiler_filter_string == nullptr) {
-    if (instruction_set == kX86_64 || instruction_set == kArm64 || instruction_set == kMips) {
+    if ((instruction_set == kX86_64 && image) ||
+        instruction_set == kArm64 ||
+        instruction_set == kMips) {
       // TODO: implement/fix compilers for these architectures.
       compiler_filter_string = "interpret-only";
     } else if (image) {
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 6a337b3038..96ffc9310f 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -25,14 +25,14 @@
 namespace art {
 namespace arm {
 
-static const uint32_t gZero = 0;
+static constexpr uint32_t gZero = 0;
 
 void ArmContext::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfSRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[PC] = &pc_;
@@ -69,31 +69,46 @@ void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
   }
 }
 
-void ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
+bool ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  DCHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ArmContext::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+  DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void ArmContext::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[R0] = const_cast<uint32_t*>(&gZero);
   gprs_[R1] = const_cast<uint32_t*>(&gZero);
-  gprs_[R2] = NULL;
-  gprs_[R3] = NULL;
+  gprs_[R2] = nullptr;
+  gprs_[R3] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
 
 void ArmContext::DoLongJump() {
-  uintptr_t gprs[16];
-  uint32_t fprs[32];
+  uintptr_t gprs[kNumberOfCoreRegisters];
+  uint32_t fprs[kNumberOfSRegisters];
   for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : ArmContext::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : ArmContext::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfSRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : ArmContext::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : ArmContext::kBadFprBase + i;
   }
   DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
   art_quick_do_long_jump(gprs, fprs);
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 2ccce8dcaf..e894f169d3 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -32,31 +32,53 @@ class ArmContext : public Context {
 
   virtual ~ArmContext() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
-    SetGPR(PC, new_pc);
+  void SetPC(uintptr_t new_pc) OVERRIDE {
+    bool success = SetGPR(PC, new_pc);
+    CHECK(success) << "Failed to set PC register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, initialized to NULL or the specific registers below.
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 09e8b59e3b..3eb92c8556 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -28,14 +28,14 @@
 namespace art {
 namespace arm64 {
 
-static const uint64_t gZero = 0;
+static constexpr uint64_t gZero = 0;
 
 void Arm64Context::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[LR] = &pc_;
@@ -73,73 +73,88 @@ void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
   }
 }
 
-void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  DCHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool Arm64Context::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+  DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void Arm64Context::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[X0] = const_cast<uint64_t*>(&gZero);
-  gprs_[X1] = NULL;
-  gprs_[X2] = NULL;
-  gprs_[X3] = NULL;
-  gprs_[X4] = NULL;
-  gprs_[X5] = NULL;
-  gprs_[X6] = NULL;
-  gprs_[X7] = NULL;
-  gprs_[X8] = NULL;
-  gprs_[X9] = NULL;
-  gprs_[X10] = NULL;
-  gprs_[X11] = NULL;
-  gprs_[X12] = NULL;
-  gprs_[X13] = NULL;
-  gprs_[X14] = NULL;
-  gprs_[X15] = NULL;
+  gprs_[X1] = nullptr;
+  gprs_[X2] = nullptr;
+  gprs_[X3] = nullptr;
+  gprs_[X4] = nullptr;
+  gprs_[X5] = nullptr;
+  gprs_[X6] = nullptr;
+  gprs_[X7] = nullptr;
+  gprs_[X8] = nullptr;
+  gprs_[X9] = nullptr;
+  gprs_[X10] = nullptr;
+  gprs_[X11] = nullptr;
+  gprs_[X12] = nullptr;
+  gprs_[X13] = nullptr;
+  gprs_[X14] = nullptr;
+  gprs_[X15] = nullptr;
 
   // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
 
-  fprs_[D0] = NULL;
-  fprs_[D1] = NULL;
-  fprs_[D2] = NULL;
-  fprs_[D3] = NULL;
-  fprs_[D4] = NULL;
-  fprs_[D5] = NULL;
-  fprs_[D6] = NULL;
-  fprs_[D7] = NULL;
-
-  fprs_[D16] = NULL;
-  fprs_[D17] = NULL;
-  fprs_[D18] = NULL;
-  fprs_[D19] = NULL;
-  fprs_[D20] = NULL;
-  fprs_[D21] = NULL;
-  fprs_[D22] = NULL;
-  fprs_[D23] = NULL;
-  fprs_[D24] = NULL;
-  fprs_[D25] = NULL;
-  fprs_[D26] = NULL;
-  fprs_[D27] = NULL;
-  fprs_[D28] = NULL;
-  fprs_[D29] = NULL;
-  fprs_[D30] = NULL;
-  fprs_[D31] = NULL;
+  fprs_[D0] = nullptr;
+  fprs_[D1] = nullptr;
+  fprs_[D2] = nullptr;
+  fprs_[D3] = nullptr;
+  fprs_[D4] = nullptr;
+  fprs_[D5] = nullptr;
+  fprs_[D6] = nullptr;
+  fprs_[D7] = nullptr;
+
+  fprs_[D16] = nullptr;
+  fprs_[D17] = nullptr;
+  fprs_[D18] = nullptr;
+  fprs_[D19] = nullptr;
+  fprs_[D20] = nullptr;
+  fprs_[D21] = nullptr;
+  fprs_[D22] = nullptr;
+  fprs_[D23] = nullptr;
+  fprs_[D24] = nullptr;
+  fprs_[D25] = nullptr;
+  fprs_[D26] = nullptr;
+  fprs_[D27] = nullptr;
+  fprs_[D28] = nullptr;
+  fprs_[D29] = nullptr;
+  fprs_[D30] = nullptr;
+  fprs_[D31] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
 
 void Arm64Context::DoLongJump() {
   uint64_t gprs[32];
-  uint64_t fprs[32];
+  uint64_t fprs[kNumberOfDRegisters];
 
   // Do not use kNumberOfCoreRegisters, as this is with the distinction of SP and XZR
   for (size_t i = 0; i < 32; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : Arm64Context::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : Arm64Context::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : Arm64Context::kBadGprBase + i;
   }
   DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
   art_quick_do_long_jump(gprs, fprs);
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index d40e291a69..1f69869099 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -32,31 +32,53 @@ class Arm64Context : public Context {
 
   ~Arm64Context() {}
 
-  void Reset();
+  void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE;
 
-  void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  void SetPC(uintptr_t new_lr) {
-    SetGPR(LR, new_lr);
+  void SetPC(uintptr_t new_lr) OVERRIDE {
+    bool success = SetGPR(LR, new_lr);
+    CHECK(success) << "Failed to set LR register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  void SetGPR(uint32_t reg, uintptr_t value);
-  void SmashCallerSaves();
-  void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, initialized to NULL or the specific registers below.
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 84ee7782f5..cbb2c27f60 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -72,17 +72,14 @@ extern "C" void art_quick_handle_fill_data(void*, void*);
 extern "C" void art_quick_lock_object(void*);
 extern "C" void art_quick_unlock_object(void*);
 
-// Math entrypoints.
-extern int32_t CmpgDouble(double a, double b);
-extern int32_t CmplDouble(double a, double b);
-extern int32_t CmpgFloat(float a, float b);
-extern int32_t CmplFloat(float a, float b);
-
 // Single-precision FP arithmetics.
-extern "C" float fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
+extern "C" float art_quick_fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
 
 // Double-precision FP arithmetics.
-extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+extern "C" double art_quick_fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+
+// Memcpy
+extern "C" void* art_quick_memcpy(void* __restrict, const void* __restrict, size_t);
 
 // Intrinsic entrypoints.
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
@@ -175,31 +172,31 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
   qpoints->pUnlockObject = art_quick_unlock_object;
 
   // Math
-  // TODO NULL entrypoints not needed for ARM64 - generate inline.
-  qpoints->pCmpgDouble = CmpgDouble;
-  qpoints->pCmpgFloat = CmpgFloat;
-  qpoints->pCmplDouble = CmplDouble;
-  qpoints->pCmplFloat = CmplFloat;
-  qpoints->pFmod = fmod;
-  qpoints->pL2d = NULL;
-  qpoints->pFmodf = fmodf;
-  qpoints->pL2f = NULL;
-  qpoints->pD2iz = NULL;
-  qpoints->pF2iz = NULL;
-  qpoints->pIdivmod = NULL;
-  qpoints->pD2l = NULL;
-  qpoints->pF2l = NULL;
-  qpoints->pLdiv = NULL;
-  qpoints->pLmod = NULL;
-  qpoints->pLmul = NULL;
-  qpoints->pShlLong = NULL;
-  qpoints->pShrLong = NULL;
-  qpoints->pUshrLong = NULL;
+  // TODO nullptr entrypoints not needed for ARM64 - generate inline.
+  qpoints->pCmpgDouble = nullptr;
+  qpoints->pCmpgFloat = nullptr;
+  qpoints->pCmplDouble = nullptr;
+  qpoints->pCmplFloat = nullptr;
+  qpoints->pFmod = art_quick_fmod;
+  qpoints->pL2d = nullptr;
+  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pL2f = nullptr;
+  qpoints->pD2iz = nullptr;
+  qpoints->pF2iz = nullptr;
+  qpoints->pIdivmod = nullptr;
+  qpoints->pD2l = nullptr;
+  qpoints->pF2l = nullptr;
+  qpoints->pLdiv = nullptr;
+  qpoints->pLmod = nullptr;
+  qpoints->pLmul = nullptr;
+  qpoints->pShlLong = nullptr;
+  qpoints->pShrLong = nullptr;
+  qpoints->pUshrLong = nullptr;
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
-  qpoints->pMemcpy = memcpy;
+  qpoints->pMemcpy = art_quick_memcpy;
 
   // Invocation
   qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d70478837f..4ede453a15 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1862,3 +1862,22 @@ ENTRY art_quick_string_compareto
     csel x0, x0, x1, ne          // x0 := x0 != 0 ? x0 : x1
     ret
 END art_quick_string_compareto
+
+// Macro to facilitate adding new entrypoints which call to native function directly.
+// Currently, xSELF is the only thing we need to take care of between managed code and AAPCS.
+// But we might introduce more differences.
+.macro NATIVE_DOWNCALL name, entrypoint
+    .extern \entrypoint
+ENTRY \name
+    sub    sp, sp, #16
+    stp    xSELF, xLR, [sp]
+    bl     \entrypoint
+    ldp    xSELF, xLR, [sp]
+    add    sp, sp, #16
+    ret
+END \name
+.endm
+
+NATIVE_DOWNCALL art_quick_fmod fmod
+NATIVE_DOWNCALL art_quick_fmodf fmodf
+NATIVE_DOWNCALL art_quick_memcpy memcpy
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index f7b7835466..20a84dd902 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -38,30 +38,40 @@ class Context {
   // Re-initializes the registers for context re-use.
   virtual void Reset() = 0;
 
-  // Read values from callee saves in the given frame. The frame also holds
+  // Reads values from callee saves in the given frame. The frame also holds
   // the method that holds the layout.
   virtual void FillCalleeSaves(const StackVisitor& fr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
-  // Set the stack pointer value
+  // Sets the stack pointer value.
   virtual void SetSP(uintptr_t new_sp) = 0;
 
-  // Set the program counter value
+  // Sets the program counter value.
   virtual void SetPC(uintptr_t new_pc) = 0;
 
   // Gets the given GPRs address.
   virtual uintptr_t* GetGPRAddress(uint32_t reg) = 0;
 
-  // Read the given GPR
-  virtual uintptr_t GetGPR(uint32_t reg) = 0;
+  // Reads the given GPR. Returns true if we successfully read the register and
+  // set its value into 'val', returns false otherwise.
+  virtual bool GetGPR(uint32_t reg, uintptr_t* val) = 0;
 
-  // Set the given GPR.
-  virtual void SetGPR(uint32_t reg, uintptr_t value) = 0;
+  // Sets the given GPR. Returns true if we successfully write the given value
+  // into the register, returns false otherwise.
+  virtual bool SetGPR(uint32_t reg, uintptr_t value) = 0;
 
-  // Smash the caller save registers. If we're throwing, we don't want to return bogus values.
+  // Reads the given FPR. Returns true if we successfully read the register and
+  // set its value into 'val', returns false otherwise.
+  virtual bool GetFPR(uint32_t reg, uintptr_t* val) = 0;
+
+  // Sets the given FPR. Returns true if we successfully write the given value
+  // into the register, returns false otherwise.
+  virtual bool SetFPR(uint32_t reg, uintptr_t value) = 0;
+
+  // Smashes the caller save registers. If we're throwing, we don't want to return bogus values.
   virtual void SmashCallerSaves() = 0;
 
-  // Switch execution of the executing context to this context
+  // Switches execution of the executing context to this context
   virtual void DoLongJump() = 0;
 
  protected:
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index ad2889135a..789dbbb6d7 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -24,14 +24,14 @@
 namespace art {
 namespace mips {
 
-static const uint32_t gZero = 0;
+static constexpr uint32_t gZero = 0;
 
 void MipsContext::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfFRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[RA] = &ra_;
@@ -68,20 +68,35 @@ void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
   }
 }
 
-void MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
+bool MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   CHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool MipsContext::SetFPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+  CHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void MipsContext::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[V0] = const_cast<uint32_t*>(&gZero);
   gprs_[V1] = const_cast<uint32_t*>(&gZero);
-  gprs_[A1] = NULL;
-  gprs_[A2] = NULL;
-  gprs_[A3] = NULL;
+  gprs_[A1] = nullptr;
+  gprs_[A2] = nullptr;
+  gprs_[A3] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
@@ -90,10 +105,10 @@ void MipsContext::DoLongJump() {
   uintptr_t gprs[kNumberOfCoreRegisters];
   uint32_t fprs[kNumberOfFRegisters];
   for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : MipsContext::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : MipsContext::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfFRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : MipsContext::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : MipsContext::kBadGprBase + i;
   }
   art_quick_do_long_jump(gprs, fprs);
 }
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index d5f27aeeaa..f2ee335d48 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -31,31 +31,53 @@ class MipsContext : public Context {
   }
   virtual ~MipsContext() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
-    SetGPR(RA, new_pc);
+  void SetPC(uintptr_t new_pc) OVERRIDE {
+    bool success = SetGPR(RA, new_pc);
+    CHECK(success) << "Failed to set RA register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to registers in the stack, initialized to NULL except for the special cases below.
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 7785bc356e..22b8cca4d4 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1796,6 +1796,8 @@ extern "C" void art_quick_indexof(void);
 
 TEST_F(StubTest, StringIndexOf) {
 #if defined(__arm__) || defined(__aarch64__)
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 8c98d910c5..37049cfd7b 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -24,11 +24,11 @@
 namespace art {
 namespace x86 {
 
-static const uintptr_t gZero = 0;
+static constexpr uintptr_t gZero = 0;
 
 void X86Context::Reset() {
   for (size_t  i = 0; i < kNumberOfCpuRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   gprs_[ESP] = &esp_;
   // Initialize registers with easy to spot debug values.
@@ -57,15 +57,19 @@ void X86Context::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[EAX] = const_cast<uintptr_t*>(&gZero);
   gprs_[EDX] = const_cast<uintptr_t*>(&gZero);
-  gprs_[ECX] = NULL;
-  gprs_[EBX] = NULL;
+  gprs_[ECX] = nullptr;
+  gprs_[EBX] = nullptr;
 }
 
-void X86Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool X86Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
   CHECK_NE(gprs_[reg], &gZero);
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void X86Context::DoLongJump() {
@@ -74,7 +78,7 @@ void X86Context::DoLongJump() {
   // the top for the stack pointer that doesn't get popped in a pop-all.
   volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
-    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != NULL ? *gprs_[i] : X86Context::kBadGprBase + i;
+    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86Context::kBadGprBase + i;
   }
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t esp = gprs[kNumberOfCpuRegisters - ESP - 1] - kWordSize;
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index 1c510265f9..a350b2500f 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -31,32 +31,49 @@ class X86Context : public Context {
   }
   virtual ~X86Context() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(ESP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(ESP, new_sp);
+    CHECK(success) << "Failed to set ESP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
+  void SetPC(uintptr_t new_pc) OVERRIDE {
     eip_ = new_pc;
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    LOG(FATAL) << "Floating-point registers are all caller save in X86";
+    return false;
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE {
+    LOG(FATAL) << "Floating-point registers are all caller save in X86";
+    return false;
+  }
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, floating point registers are all caller save. Values are
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index c30dca186a..a85e2508f7 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -69,8 +69,6 @@ extern "C" void art_quick_lock_object(void*);
 extern "C" void art_quick_unlock_object(void*);
 
 // Math entrypoints.
-extern "C" double art_quick_fmod(double, double);
-extern "C" float art_quick_fmodf(float, float);
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
@@ -175,9 +173,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  qpoints->pFmod = art_quick_fmod;
+  // qpoints->pFmod = NULL;  // Not needed on x86.
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  qpoints->pFmodf = art_quick_fmodf;
+  // qpoints->pFmodf = NULL;  // Not needed on x86.
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 28e4dd6ab7..ecd8ce68e1 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -734,35 +734,6 @@ END_FUNCTION art_quick_memcpy
 
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
-DEFINE_FUNCTION art_quick_fmod
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass arg4 b.hi
-    PUSH edx                      // pass arg3 b.lo
-    PUSH ecx                      // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    SETUP_GOT_NOSAVE              // clobbers EBX
-    call PLT_SYMBOL(fmod)         // (jdouble a, jdouble b)
-    fstpl (%esp)                  // pop return value off fp stack
-    movsd (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(28), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-28)
-    ret
-END_FUNCTION art_quick_fmod
-
-DEFINE_FUNCTION art_quick_fmodf
-    PUSH eax                      // alignment padding
-    PUSH ecx                      // pass arg2 b
-    PUSH eax                      // pass arg1 a
-    SETUP_GOT_NOSAVE              // clobbers EBX
-    call PLT_SYMBOL(fmodf)        // (jfloat a, jfloat b)
-    fstps (%esp)                  // pop return value off fp stack
-    movss (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(12), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-12)
-    ret
-END_FUNCTION art_quick_fmodf
-
 DEFINE_FUNCTION art_quick_d2l
     PUSH eax                      // alignment padding
     PUSH ecx                      // pass arg2 a.hi
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 810ef9455a..0ccbd279f5 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -24,7 +24,7 @@
 namespace art {
 namespace x86_64 {
 
-static const uintptr_t gZero = 0;
+static constexpr uintptr_t gZero = 0;
 
 void X86_64Context::Reset() {
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
@@ -80,11 +80,26 @@ void X86_64Context::SmashCallerSaves() {
   gprs_[R11] = nullptr;
 }
 
-void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
   CHECK_NE(gprs_[reg], &gZero);
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+  CHECK_NE(fprs_[reg], &gZero);
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void X86_64Context::DoLongJump() {
@@ -93,7 +108,7 @@ void X86_64Context::DoLongJump() {
   // the top for the stack pointer that doesn't get popped in a pop-all.
   volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
-    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != NULL ? *gprs_[i] : X86_64Context::kBadGprBase + i;
+    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i;
   }
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize;
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 055df618fb..902c3b9876 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -31,32 +31,52 @@ class X86_64Context : public Context {
   }
   virtual ~X86_64Context() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(RSP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(RSP, new_sp);
+    CHECK(success) << "Failed to set RSP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
+  void SetPC(uintptr_t new_pc) OVERRIDE {
     rip_ = new_pc;
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations. Values are initialized to NULL or the special registers below.
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 2612417a51..92aabeeb14 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -174,9 +174,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  qpoints->pFmod = fmod;
+  // qpoints->pFmod = NULL;  // Not needed on x86.
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  qpoints->pFmodf = fmodf;
+  // qpoints->pFmodf = NULL;  // Not needed on x86.
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index a7d51e2078..1e12be8c3e 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -71,6 +71,10 @@ template <class Value> class Histogram {
     return sum_;
   }
 
+  Value AdjustedSum() const {
+    return sum_ * kAdjust;
+  }
+
   Value Min() const {
     return min_value_added_;
   }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 7385382359..d684a50731 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1137,8 +1137,10 @@ void ClassLinker::VisitClasses(ClassVisitor* visitor, void* arg) {
     MoveImageClassesToClassTable();
   }
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  for (const std::pair<size_t, mirror::Class*>& it : class_table_) {
-    if (!visitor(it.second, arg)) {
+  for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+    mirror::Class** root = &it.second;
+    mirror::Class* c = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
+    if (!visitor(c, arg)) {
       return;
     }
   }
@@ -2353,7 +2355,8 @@ bool ClassLinker::RemoveClass(const char* descriptor, const mirror::ClassLoader*
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
        it != end && it->first == hash;
        ++it) {
-    mirror::Class* klass = it->second;
+    mirror::Class** root = &it->second;
+    mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
     if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       class_table_.erase(it);
       return true;
@@ -2397,12 +2400,14 @@ mirror::Class* ClassLinker::LookupClassFromTableLocked(const char* descriptor,
                                                        size_t hash) {
   auto end = class_table_.end();
   for (auto it = class_table_.lower_bound(hash); it != end && it->first == hash; ++it) {
-    mirror::Class* klass = it->second;
+    mirror::Class** root = &it->second;
+    mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
     if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       if (kIsDebugBuild) {
         // Check for duplicates in the table.
         for (++it; it != end && it->first == hash; ++it) {
-          mirror::Class* klass2 = it->second;
+          mirror::Class** root2 = &it->second;
+          mirror::Class* klass2 = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root2);
           CHECK(!(klass2->GetClassLoader() == class_loader &&
               klass2->DescriptorEquals(descriptor)))
               << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
@@ -2494,7 +2499,8 @@ void ClassLinker::LookupClasses(const char* descriptor, std::vector<mirror::Clas
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
       it != end && it->first == hash; ++it) {
-    mirror::Class* klass = it->second;
+    mirror::Class** root = &it->second;
+    mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
     if (klass->DescriptorEquals(descriptor)) {
       result.push_back(klass);
     }
@@ -4362,8 +4368,10 @@ void ClassLinker::DumpAllClasses(int flags) {
   std::vector<mirror::Class*> all_classes;
   {
     ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    for (const std::pair<size_t, mirror::Class*>& it : class_table_) {
-      all_classes.push_back(it.second);
+    for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+      mirror::Class** root = &it.second;
+      mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
+      all_classes.push_back(klass);
     }
   }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index a1d7bc6bd5..6d96aa2637 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -573,6 +573,8 @@ class ClassLinker {
   // mirror::Class* instances. Results should be compared for a matching
   // Class::descriptor_ and Class::class_loader_.
   typedef std::multimap<size_t, mirror::Class*> Table;
+  // This contains strong roots. To enable concurrent root scanning of
+  // the class table, be careful to use a read barrier when accessing this.
   Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
   std::vector<std::pair<size_t, mirror::Class*>> new_class_roots_;
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 349700a1b8..f19c353f18 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -903,7 +903,7 @@ JDWP::JdwpError Dbg::GetOwnedMonitors(JDWP::ObjectId thread_id,
                                       std::vector<uint32_t>& stack_depths) {
   struct OwnedMonitorVisitor : public StackVisitor {
     OwnedMonitorVisitor(Thread* thread, Context* context,
-                        std::vector<mirror::Object*>* monitor_vector,
+                        std::vector<JDWP::ObjectId>* monitor_vector,
                         std::vector<uint32_t>* stack_depth_vector)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(thread, context), current_stack_depth(0),
@@ -919,23 +919,22 @@ JDWP::JdwpError Dbg::GetOwnedMonitors(JDWP::ObjectId thread_id,
       return true;
     }
 
-    static void AppendOwnedMonitors(mirror::Object* owned_monitor, void* arg) {
+    static void AppendOwnedMonitors(mirror::Object* owned_monitor, void* arg)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       OwnedMonitorVisitor* visitor = reinterpret_cast<OwnedMonitorVisitor*>(arg);
-      visitor->monitors->push_back(owned_monitor);
+      visitor->monitors->push_back(gRegistry->Add(owned_monitor));
       visitor->stack_depths->push_back(visitor->current_stack_depth);
     }
 
     size_t current_stack_depth;
-    std::vector<mirror::Object*>* monitors;
+    std::vector<JDWP::ObjectId>* monitors;
     std::vector<uint32_t>* stack_depths;
   };
 
-  std::vector<mirror::Object*> monitor_vector;
-  std::vector<uint32_t> stack_depth_vector;
   ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread;
   {
     MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
-    Thread* thread;
     JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
     if (error != JDWP::ERR_NONE) {
       return error;
@@ -943,18 +942,10 @@ JDWP::JdwpError Dbg::GetOwnedMonitors(JDWP::ObjectId thread_id,
     if (!IsSuspendedForDebugger(soa, thread)) {
       return JDWP::ERR_THREAD_NOT_SUSPENDED;
     }
-    std::unique_ptr<Context> context(Context::Create());
-    OwnedMonitorVisitor visitor(thread, context.get(), &monitor_vector, &stack_depth_vector);
-    visitor.WalkStack();
-  }
-
-  // Add() requires the thread_list_lock_ not held to avoid the lock
-  // level violation.
-  for (size_t i = 0; i < monitor_vector.size(); ++i) {
-    monitors.push_back(gRegistry->Add(monitor_vector[i]));
-    stack_depths.push_back(stack_depth_vector[i]);
   }
-
+  std::unique_ptr<Context> context(Context::Create());
+  OwnedMonitorVisitor visitor(thread, context.get(), &monitors, &stack_depths);
+  visitor.WalkStack();
   return JDWP::ERR_NONE;
 }
 
@@ -2355,100 +2346,125 @@ JDWP::JdwpError Dbg::GetLocalValue(JDWP::ObjectId thread_id, JDWP::FrameId frame
         return false;
       }
       uint16_t reg = DemangleSlot(slot_, m);
-
+      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
       switch (tag_) {
-      case JDWP::JT_BOOLEAN:
-        {
+        case JDWP::JT_BOOLEAN: {
           CHECK_EQ(width_, 1U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
-          JDWP::Set1(buf_+1, intVal != 0);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
+            JDWP::Set1(buf_+1, intVal != 0);
+          } else {
+            VLOG(jdwp) << "failed to get boolean local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_BYTE:
-        {
+        case JDWP::JT_BYTE: {
           CHECK_EQ(width_, 1U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
-          JDWP::Set1(buf_+1, intVal);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
+            JDWP::Set1(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get byte local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_SHORT:
-      case JDWP::JT_CHAR:
-        {
+        case JDWP::JT_SHORT:
+        case JDWP::JT_CHAR: {
           CHECK_EQ(width_, 2U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
-          JDWP::Set2BE(buf_+1, intVal);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
+            JDWP::Set2BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get short/char local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_INT:
-        {
+        case JDWP::JT_INT: {
           CHECK_EQ(width_, 4U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get int local " << reg << " = " << intVal;
-          JDWP::Set4BE(buf_+1, intVal);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get int local " << reg << " = " << intVal;
+            JDWP::Set4BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get int local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_FLOAT:
-        {
+        case JDWP::JT_FLOAT: {
           CHECK_EQ(width_, 4U);
-          uint32_t intVal = GetVReg(m, reg, kFloatVReg);
-          VLOG(jdwp) << "get int/float local " << reg << " = " << intVal;
-          JDWP::Set4BE(buf_+1, intVal);
-        }
-        break;
-      case JDWP::JT_ARRAY:
-        {
-          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
-          VLOG(jdwp) << "get array local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-            LOG(FATAL) << "Register " << reg << " expected to hold array: " << o;
+          uint32_t intVal;
+          if (GetVReg(m, reg, kFloatVReg, &intVal)) {
+            VLOG(jdwp) << "get float local " << reg << " = " << intVal;
+            JDWP::Set4BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get float local " << reg;
+            error_ = kFailureErrorCode;
           }
-          JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          break;
         }
-        break;
-      case JDWP::JT_CLASS_LOADER:
-      case JDWP::JT_CLASS_OBJECT:
-      case JDWP::JT_OBJECT:
-      case JDWP::JT_STRING:
-      case JDWP::JT_THREAD:
-      case JDWP::JT_THREAD_GROUP:
-        {
+        case JDWP::JT_ARRAY:
+        case JDWP::JT_CLASS_LOADER:
+        case JDWP::JT_CLASS_OBJECT:
+        case JDWP::JT_OBJECT:
+        case JDWP::JT_STRING:
+        case JDWP::JT_THREAD:
+        case JDWP::JT_THREAD_GROUP: {
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
-          VLOG(jdwp) << "get object local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-            LOG(FATAL) << "Register " << reg << " expected to hold object: " << o;
+          uint32_t intVal;
+          if (GetVReg(m, reg, kReferenceVReg, &intVal)) {
+            mirror::Object* o = reinterpret_cast<mirror::Object*>(intVal);
+            VLOG(jdwp) << "get " << tag_ << " object local " << reg << " = " << o;
+            if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
+              LOG(FATAL) << "Register " << reg << " expected to hold " << tag_ << " object: " << o;
+            }
+            tag_ = TagFromObject(soa_, o);
+            JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          } else {
+            VLOG(jdwp) << "failed to get " << tag_ << " object local " << reg;
+            error_ = kFailureErrorCode;
           }
-          tag_ = TagFromObject(soa_, o);
-          JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          break;
         }
-        break;
-      case JDWP::JT_DOUBLE:
-        {
+        case JDWP::JT_DOUBLE: {
           CHECK_EQ(width_, 8U);
-          uint32_t lo = GetVReg(m, reg, kDoubleLoVReg);
-          uint64_t hi = GetVReg(m, reg + 1, kDoubleHiVReg);
-          uint64_t longVal = (hi << 32) | lo;
-          VLOG(jdwp) << "get double/long local " << hi << ":" << lo << " = " << longVal;
-          JDWP::Set8BE(buf_+1, longVal);
+          uint32_t lo;
+          uint32_t hi;
+          if (GetVReg(m, reg, kDoubleLoVReg, &lo) && GetVReg(m, reg + 1, kDoubleHiVReg, &hi)) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "get double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            JDWP::Set8BE(buf_+1, longVal);
+          } else {
+            VLOG(jdwp) << "failed to get double local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_LONG:
-        {
+        case JDWP::JT_LONG: {
           CHECK_EQ(width_, 8U);
-          uint32_t lo = GetVReg(m, reg, kLongLoVReg);
-          uint64_t hi = GetVReg(m, reg + 1, kLongHiVReg);
-          uint64_t longVal = (hi << 32) | lo;
-          VLOG(jdwp) << "get double/long local " << hi << ":" << lo << " = " << longVal;
-          JDWP::Set8BE(buf_+1, longVal);
+          uint32_t lo;
+          uint32_t hi;
+          if (GetVReg(m, reg, kLongLoVReg, &lo) && GetVReg(m, reg + 1, kLongHiVReg, &hi)) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "get long local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            JDWP::Set8BE(buf_+1, longVal);
+          } else {
+            VLOG(jdwp) << "failed to get long local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      default:
-        LOG(FATAL) << "Unknown tag " << tag_;
-        break;
+        default:
+          LOG(FATAL) << "Unknown tag " << tag_;
+          break;
       }
 
       // Prepend tag, which may have been updated.
@@ -2504,48 +2520,89 @@ JDWP::JdwpError Dbg::SetLocalValue(JDWP::ObjectId thread_id, JDWP::FrameId frame
         return false;
       }
       uint16_t reg = DemangleSlot(slot_, m);
-
+      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
       switch (tag_) {
         case JDWP::JT_BOOLEAN:
         case JDWP::JT_BYTE:
           CHECK_EQ(width_, 1U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set boolean/byte local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_SHORT:
         case JDWP::JT_CHAR:
           CHECK_EQ(width_, 2U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set short/char local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_INT:
           CHECK_EQ(width_, 4U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set int local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_FLOAT:
           CHECK_EQ(width_, 4U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kFloatVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kFloatVReg)) {
+            VLOG(jdwp) << "failed to set float local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_ARRAY:
+        case JDWP::JT_CLASS_LOADER:
+        case JDWP::JT_CLASS_OBJECT:
         case JDWP::JT_OBJECT:
         case JDWP::JT_STRING:
-        {
+        case JDWP::JT_THREAD:
+        case JDWP::JT_THREAD_GROUP: {
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
           mirror::Object* o = gRegistry->Get<mirror::Object*>(static_cast<JDWP::ObjectId>(value_));
           if (o == ObjectRegistry::kInvalidObject) {
-            UNIMPLEMENTED(FATAL) << "return an error code when given an invalid object to store";
+            VLOG(jdwp) << tag_ << " object " << o << " is an invalid object";
+            error_ = JDWP::ERR_INVALID_OBJECT;
+          } else if (!SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
+                              kReferenceVReg)) {
+            VLOG(jdwp) << "failed to set " << tag_ << " object local " << reg << " = " << o;
+            error_ = kFailureErrorCode;
           }
-          SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)), kReferenceVReg);
+          break;
         }
-        break;
-        case JDWP::JT_DOUBLE:
+        case JDWP::JT_DOUBLE: {
           CHECK_EQ(width_, 8U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kDoubleLoVReg);
-          SetVReg(m, reg + 1, static_cast<uint32_t>(value_ >> 32), kDoubleHiVReg);
+          const uint32_t lo = static_cast<uint32_t>(value_);
+          const uint32_t hi = static_cast<uint32_t>(value_ >> 32);
+          bool success = SetVReg(m, reg, lo, kDoubleLoVReg);
+          success &= SetVReg(m, reg + 1, hi, kDoubleHiVReg);
+          if (!success) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "failed to set double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            error_ = kFailureErrorCode;
+          }
           break;
-        case JDWP::JT_LONG:
+        }
+        case JDWP::JT_LONG: {
           CHECK_EQ(width_, 8U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kLongLoVReg);
-          SetVReg(m, reg + 1, static_cast<uint32_t>(value_ >> 32), kLongHiVReg);
+          const uint32_t lo = static_cast<uint32_t>(value_);
+          const uint32_t hi = static_cast<uint32_t>(value_ >> 32);
+          bool success = SetVReg(m, reg, lo, kLongLoVReg);
+          success &= SetVReg(m, reg + 1, hi, kLongHiVReg);
+          if (!success) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "failed to set double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            error_ = kFailureErrorCode;
+          }
           break;
+        }
         default:
           LOG(FATAL) << "Unknown tag " << tag_;
           break;
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 3000217697..291e2d0756 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -1493,6 +1493,10 @@ bool DexFileVerifier::CheckInterProtoIdItem() {
 
   DexFileParameterIterator it(*dex_file_, *item);
   while (it.HasNext() && *shorty != '\0') {
+    if (!CheckIndex(it.GetTypeIdx(), dex_file_->NumTypeIds(),
+                    "inter_proto_id_item shorty type_idx")) {
+      return false;
+    }
     const char* descriptor = it.GetDescriptor();
     if (!CheckShortyDescriptorMatch(*shorty, descriptor, false)) {
       return false;
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 16add0bee4..a17c36be6d 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -56,6 +56,7 @@ void GarbageCollector::ResetCumulativeStatistics() {
 }
 
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
+  ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()).c_str());
   Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   timings_.Reset();
@@ -86,6 +87,7 @@ void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
   for (uint64_t pause_time : pause_times_) {
     pause_histogram_.AddValue(pause_time / 1000);
   }
+  ATRACE_END();
 }
 
 void GarbageCollector::SwapBitmaps() {
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 02dd4d956e..f4f9dbb40a 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -105,7 +105,7 @@ class GarbageCollector {
   }
 
   uint64_t GetTotalPausedTimeNs() const {
-    return pause_histogram_.Sum();
+    return pause_histogram_.AdjustedSum();
   }
 
   int64_t GetTotalFreedBytes() const {
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 890036bc4a..c72913a401 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -176,7 +176,7 @@ void MarkSweep::ProcessReferences(Thread* self) {
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      true, &timings_, clear_soft_references_, &IsMarkedCallback, &MarkObjectCallback,
+      true, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, &MarkObjectCallback,
       &ProcessMarkStackCallback, this);
 }
 
@@ -374,6 +374,10 @@ void MarkSweep::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>*
   reinterpret_cast<MarkSweep*>(arg)->MarkObject(ref->AsMirrorPtr());
 }
 
+bool MarkSweep::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg) {
+  return reinterpret_cast<MarkSweep*>(arg)->IsMarked(ref->AsMirrorPtr());
+}
+
 class MarkSweepMarkObjectSlowPath {
  public:
   explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {
@@ -1170,11 +1174,11 @@ void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void MarkSweep::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref) {
-  DCHECK(klass != nullptr);
   if (kCountJavaLangRefs) {
     ++reference_count_;
   }
-  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, &HeapReferenceMarkedCallback,
+                                                         this);
 }
 
 class MarkObjectVisitor {
@@ -1270,6 +1274,7 @@ void MarkSweep::ProcessMarkStack(bool paused) {
 
 inline bool MarkSweep::IsMarked(const Object* object) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  DCHECK(object != nullptr);
   if (immune_region_.ContainsObject(object)) {
     return true;
   }
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index a0a0dd8ab4..a44d8a1f90 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -178,6 +178,10 @@ class MarkSweep : public GarbageCollector {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t thread_id,
                                RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index e5bb1ccbe6..badf8b3501 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -164,7 +164,7 @@ void SemiSpace::ProcessReferences(Thread* self) {
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      false, &timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+      false, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback,
       &MarkObjectCallback, &ProcessMarkStackCallback, this);
 }
 
@@ -649,6 +649,22 @@ void SemiSpace::MarkRoots() {
   Runtime::Current()->VisitRoots(MarkRootCallback, this);
 }
 
+bool SemiSpace::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object,
+                                            void* arg) {
+  mirror::Object* obj = object->AsMirrorPtr();
+  mirror::Object* new_obj =
+      reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(obj);
+  if (new_obj == nullptr) {
+    return false;
+  }
+  if (new_obj != obj) {
+    // Write barrier is not necessary since it still points to the same object, just at a different
+    // address.
+    object->Assign(new_obj);
+  }
+  return true;
+}
+
 mirror::Object* SemiSpace::MarkedForwardingAddressCallback(mirror::Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
@@ -698,7 +714,7 @@ void SemiSpace::SweepLargeObjects(bool swap_bitmaps) {
 // marked, put it on the appropriate list in the heap for later processing.
 void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
-                                                         MarkedForwardingAddressCallback, this);
+                                                         &HeapReferenceMarkedCallback, this);
 }
 
 class SemiSpaceMarkObjectVisitor {
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index a95abe440a..bff08478e0 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -162,6 +162,10 @@ class SemiSpace : public GarbageCollector {
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f5f7a868a3..e6a5380da1 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -285,17 +285,11 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
   CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
   AddModUnionTable(mod_union_table);
 
-  if (collector::SemiSpace::kUseRememberedSet) {
+  if (collector::SemiSpace::kUseRememberedSet && non_moving_space_ != main_space_) {
     accounting::RememberedSet* non_moving_space_rem_set =
         new accounting::RememberedSet("Non-moving space remembered set", this, non_moving_space_);
     CHECK(non_moving_space_rem_set != nullptr) << "Failed to create non-moving space remembered set";
     AddRememberedSet(non_moving_space_rem_set);
-    if (main_space_ != nullptr && main_space_ != non_moving_space_) {
-      accounting::RememberedSet* main_space_rem_set =
-          new accounting::RememberedSet("Main space remembered set", this, main_space_);
-      CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
-      AddRememberedSet(main_space_rem_set);
-    }
   }
 
   // TODO: Count objects in the image space here.
@@ -376,6 +370,9 @@ void Heap::CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t gr
     // that getting primitive array elements is faster.
     can_move_objects = !have_zygote_space_;
   }
+  if (collector::SemiSpace::kUseRememberedSet && main_space_ != nullptr) {
+    RemoveRememberedSet(main_space_);
+  }
   if (kUseRosAlloc) {
     rosalloc_space_ = space::RosAllocSpace::CreateFromMemMap(
         mem_map, "main rosalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
@@ -390,6 +387,12 @@ void Heap::CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t gr
     CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
+  if (collector::SemiSpace::kUseRememberedSet) {
+    accounting::RememberedSet* main_space_rem_set =
+        new accounting::RememberedSet("Main space remembered set", this, main_space_);
+    CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
+    AddRememberedSet(main_space_rem_set);
+  }
   VLOG(heap) << "Created main space " << main_space_;
 }
 
@@ -1386,7 +1389,6 @@ void Heap::TransitionCollector(CollectorType collector_type) {
         Compact(bump_pointer_space_, main_space_);
         // Remove the main space so that we don't try to trim it, this doens't work for debug
         // builds since RosAlloc attempts to read the magic number from a protected page.
-        // TODO: Clean this up by getting rid of the remove_as_default parameter.
         RemoveSpace(main_space_);
       }
       break;
@@ -1411,14 +1413,20 @@ void Heap::TransitionCollector(CollectorType collector_type) {
   ChangeCollector(collector_type);
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
-  reference_processor_.EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences(self);
   uint64_t duration = NanoTime() - start_time;
   GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
   int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   int32_t delta_allocated = before_allocated - after_allocated;
+  std::string saved_str;
+  if (delta_allocated >= 0) {
+    saved_str = " saved at least " + PrettySize(delta_allocated);
+  } else {
+    saved_str = " expanded " + PrettySize(-delta_allocated);
+  }
   LOG(INFO) << "Heap transition to " << process_state_ << " took "
-      << PrettyDuration(duration) << " saved at least " << PrettySize(delta_allocated);
+      << PrettyDuration(duration) << saved_str;
 }
 
 void Heap::ChangeCollector(CollectorType collector_type) {
@@ -1617,9 +1625,9 @@ void Heap::PreZygoteFork() {
       madvise(main_space_->Begin(), main_space_->Capacity(), MADV_DONTNEED);
       MemMap* mem_map = main_space_->ReleaseMemMap();
       RemoveSpace(main_space_);
-      delete main_space_;
-      main_space_ = nullptr;
+      space::Space* old_main_space = main_space_;
       CreateMainMallocSpace(mem_map, kDefaultInitialSize, mem_map->Size(), mem_map->Size());
+      delete old_main_space;
       AddSpace(main_space_);
     } else {
       bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
@@ -1808,13 +1816,12 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
   CHECK(collector != nullptr)
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
-  ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
   collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
   // Enqueue cleared references.
-  reference_processor_.EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences(self);
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(collector);
   const size_t duration = collector->GetDurationNs();
@@ -1840,7 +1847,7 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
                      << ((i != pause_times.size() - 1) ? "," : "");
     }
     LOG(INFO) << gc_cause << " " << collector->GetName()
-              << " GC freed "  <<  collector->GetFreedObjects() << "("
+              << " GC freed "  << collector->GetFreedObjects() << "("
               << PrettySize(collector->GetFreedBytes()) << ") AllocSpace objects, "
               << collector->GetFreedLargeObjects() << "("
               << PrettySize(collector->GetFreedLargeObjectBytes()) << ") LOS objects, "
@@ -1850,8 +1857,6 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
     VLOG(heap) << ConstDumpable<TimingLogger>(collector->GetTimings());
   }
   FinishGC(self, gc_type);
-  ATRACE_END();
-
   // Inform DDMS that a GC completed.
   Dbg::GcDidFinish();
   return gc_type;
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 7988af7f6b..3ff9889e3c 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -44,36 +44,35 @@ void ReferenceProcessor::DisableSlowPath(Thread* self) {
 
 mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* reference) {
   mirror::Object* const referent = reference->GetReferent();
-  if (LIKELY(!slow_path_enabled_)) {
+  // If the referent is null then it is already cleared, we can just return null since there is no
+  // scenario where it becomes non-null during the reference processing phase.
+  if (LIKELY(!slow_path_enabled_) || referent == nullptr) {
     return referent;
   }
-  // Another fast path, the referent is cleared, we can just return null since there is no scenario
-  // where it becomes non-null.
-  if (referent == nullptr) {
-    return nullptr;
-  }
   MutexLock mu(self, lock_);
   while (slow_path_enabled_) {
-    mirror::Object* const referent = reference->GetReferent();
-    // If the referent became cleared, return it.
-    if (referent == nullptr) {
+    mirror::HeapReference<mirror::Object>* const referent_addr =
+        reference->GetReferentReferenceAddr();
+    // If the referent became cleared, return it. Don't need barrier since thread roots can't get
+    // updated until after we leave the function due to holding the mutator lock.
+    if (referent_addr->AsMirrorPtr() == nullptr) {
       return nullptr;
     }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
-    // it to the mutator as long as the GC is not preserving references. If the GC is
-    IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_;
+    // it to the mutator as long as the GC is not preserving references.
+    IsHeapReferenceMarkedCallback* const is_marked_callback =
+        process_references_args_.is_marked_callback_;
     if (LIKELY(is_marked_callback != nullptr)) {
-      mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_);
       // If it's null it means not marked, but it could become marked if the referent is reachable
       // by finalizer referents. So we can not return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
       // in the heap causing corruption since this field would get swept.
-      if (obj != nullptr) {
+      if (is_marked_callback(referent_addr, process_references_args_.arg_)) {
         if (!preserving_references_ ||
            (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
-          return obj;
+          return referent_addr->AsMirrorPtr();
         }
       }
     }
@@ -82,10 +81,14 @@ mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference*
   return reference->GetReferent();
 }
 
-mirror::Object* ReferenceProcessor::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
+bool ReferenceProcessor::PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj,
+                                                       void* arg) {
   auto* const args = reinterpret_cast<ProcessReferencesArgs*>(arg);
-  // TODO: Not preserve all soft references.
-  return args->mark_callback_(obj, args->arg_);
+  // TODO: Add smarter logic for preserving soft references.
+  mirror::Object* new_obj = args->mark_callback_(obj->AsMirrorPtr(), args->arg_);
+  DCHECK(new_obj != nullptr);
+  obj->Assign(new_obj);
+  return true;
 }
 
 void ReferenceProcessor::StartPreservingReferences(Thread* self) {
@@ -103,7 +106,7 @@ void ReferenceProcessor::StopPreservingReferences(Thread* self) {
 // Process reference class instances and schedule finalizations.
 void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timings,
                                            bool clear_soft_references,
-                                           IsMarkedCallback* is_marked_callback,
+                                           IsHeapReferenceMarkedCallback* is_marked_callback,
                                            MarkObjectCallback* mark_object_callback,
                                            ProcessMarkStackCallback* process_mark_stack_callback,
                                            void* arg) {
@@ -132,8 +135,8 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing
     }
   }
   // Clear all remaining soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   {
     TimingLogger::ScopedSplit split(concurrent ? "EnqueueFinalizerReferences" :
         "(Paused)EnqueueFinalizerReferences", timings);
@@ -141,7 +144,7 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing
       StartPreservingReferences(self);
     }
     // Preserve all white objects with finalize methods and schedule them for finalization.
-    finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
+    finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, is_marked_callback,
                                                           mark_object_callback, arg);
     process_mark_stack_callback(arg);
     if (concurrent) {
@@ -149,10 +152,10 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing
     }
   }
   // Clear all finalizer referent reachable soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   // Clear all phantom references with white referents.
-  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  phantom_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   // At this point all reference queues other than the cleared references should be empty.
   DCHECK(soft_reference_queue_.IsEmpty());
   DCHECK(weak_reference_queue_.IsEmpty());
@@ -176,39 +179,33 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void ReferenceProcessor::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                                                IsMarkedCallback is_marked_callback, void* arg) {
+                                                IsHeapReferenceMarkedCallback* is_marked_callback,
+                                                void* arg) {
   // klass can be the class of the old object if the visitor already updated the class of ref.
+  DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
-  mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-  if (referent != nullptr) {
-    mirror::Object* forward_address = is_marked_callback(referent, arg);
-    // Null means that the object is not currently marked.
-    if (forward_address == nullptr) {
-      Thread* self = Thread::Current();
-      // TODO: Remove these locks, and use atomic stacks for storing references?
-      // We need to check that the references haven't already been enqueued since we can end up
-      // scanning the same reference multiple times due to dirty cards.
-      if (klass->IsSoftReferenceClass()) {
-        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsWeakReferenceClass()) {
-        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsFinalizerReferenceClass()) {
-        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsPhantomReferenceClass()) {
-        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else {
-        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
-                   << klass->GetAccessFlags();
-      }
-    } else if (referent != forward_address) {
-      // Referent is already marked and we need to update it.
-      ref->SetReferent<false>(forward_address);
+  mirror::HeapReference<mirror::Object>* referent = ref->GetReferentReferenceAddr();
+  if (referent->AsMirrorPtr() != nullptr && !is_marked_callback(referent, arg)) {
+    Thread* self = Thread::Current();
+    // TODO: Remove these locks, and use atomic stacks for storing references?
+    // We need to check that the references haven't already been enqueued since we can end up
+    // scanning the same reference multiple times due to dirty cards.
+    if (klass->IsSoftReferenceClass()) {
+      soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsWeakReferenceClass()) {
+      weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsFinalizerReferenceClass()) {
+      finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsPhantomReferenceClass()) {
+      phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else {
+      LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+                 << klass->GetAccessFlags();
     }
   }
 }
 
-void ReferenceProcessor::EnqueueClearedReferences() {
-  Thread* self = Thread::Current();
+void ReferenceProcessor::EnqueueClearedReferences(Thread* self) {
   Locks::mutator_lock_->AssertNotHeld(self);
   if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index f082a9ec66..ff7da52bdf 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -40,9 +40,10 @@ class Heap;
 class ReferenceProcessor {
  public:
   explicit ReferenceProcessor();
-  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
+  static bool PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ProcessReferences(bool concurrent, TimingLogger* timings, bool clear_soft_references,
-                         IsMarkedCallback* is_marked_callback,
+                         IsHeapReferenceMarkedCallback* is_marked_callback,
                          MarkObjectCallback* mark_object_callback,
                          ProcessMarkStackCallback* process_mark_stack_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -54,21 +55,21 @@ class ReferenceProcessor {
   // Decode the referent, may block if references are being processed.
   mirror::Object* GetReferent(Thread* self, mirror::Reference* reference)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
-  void EnqueueClearedReferences() LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void EnqueueClearedReferences(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_);
   void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                              IsMarkedCallback is_marked_callback, void* arg)
+                              IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   class ProcessReferencesArgs {
    public:
-    ProcessReferencesArgs(IsMarkedCallback* is_marked_callback,
+    ProcessReferencesArgs(IsHeapReferenceMarkedCallback* is_marked_callback,
                           MarkObjectCallback* mark_callback, void* arg)
         : is_marked_callback_(is_marked_callback), mark_callback_(mark_callback), arg_(arg) {
     }
 
     // The is marked callback is null when the args aren't set up.
-    IsMarkedCallback* is_marked_callback_;
+    IsHeapReferenceMarkedCallback* is_marked_callback_;
     MarkObjectCallback* mark_callback_;
     void* arg_;
   };
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 3910c297a2..19476e62c2 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -26,8 +26,7 @@ namespace art {
 namespace gc {
 
 ReferenceQueue::ReferenceQueue()
-    : lock_("reference queue lock"),
-      list_(nullptr) {
+    : lock_("reference queue lock"), list_(nullptr) {
 }
 
 void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref) {
@@ -104,76 +103,61 @@ void ReferenceQueue::Dump(std::ostream& os) const {
   }
 }
 
-void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references,
-                                          IsMarkedCallback* preserve_callback,
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue* cleared_references,
+                                          IsHeapReferenceMarkedCallback* preserve_callback,
                                           void* arg) {
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address == nullptr) {
-        // Referent is white, clear it.
-        if (Runtime::Current()->IsActiveTransaction()) {
-          ref->ClearReferent<true>();
-        } else {
-          ref->ClearReferent<false>();
-        }
-        if (ref->IsEnqueuable()) {
-          cleared_references.EnqueuePendingReference(ref);
-        }
-      } else if (referent != forward_address) {
-        // Object moved, need to updated the referent.
-        ref->SetReferent<false>(forward_address);
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr && !preserve_callback(referent_addr, arg)) {
+      // Referent is white, clear it.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        ref->ClearReferent<true>();
+      } else {
+        ref->ClearReferent<false>();
+      }
+      if (ref->IsEnqueuable()) {
+        cleared_references->EnqueuePendingReference(ref);
       }
     }
   }
 }
 
-void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                                IsMarkedCallback* is_marked_callback,
+void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+                                                IsHeapReferenceMarkedCallback* is_marked_callback,
                                                 MarkObjectCallback* mark_object_callback,
                                                 void* arg) {
   while (!IsEmpty()) {
     mirror::FinalizerReference* ref = DequeuePendingReference()->AsFinalizerReference();
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = is_marked_callback(referent, arg);
-      // If the referent isn't marked, mark it and update the
-      if (forward_address == nullptr) {
-        forward_address = mark_object_callback(referent, arg);
-        // If the referent is non-null the reference must queuable.
-        DCHECK(ref->IsEnqueuable());
-        // Move the updated referent to the zombie field.
-        if (Runtime::Current()->IsActiveTransaction()) {
-          ref->SetZombie<true>(forward_address);
-          ref->ClearReferent<true>();
-        } else {
-          ref->SetZombie<false>(forward_address);
-          ref->ClearReferent<false>();
-        }
-        cleared_references.EnqueueReference(ref);
-      } else if (referent != forward_address) {
-        ref->SetReferent<false>(forward_address);
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr && !is_marked_callback(referent_addr, arg)) {
+      mirror::Object* forward_address = mark_object_callback(referent_addr->AsMirrorPtr(), arg);
+      // If the referent is non-null the reference must queuable.
+      DCHECK(ref->IsEnqueuable());
+      // Move the updated referent to the zombie field.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        ref->SetZombie<true>(forward_address);
+        ref->ClearReferent<true>();
+      } else {
+        ref->SetZombie<false>(forward_address);
+        ref->ClearReferent<false>();
       }
+      cleared_references->EnqueueReference(ref);
     }
   }
 }
 
-void ReferenceQueue::ForwardSoftReferences(IsMarkedCallback* preserve_callback,
-                                                void* arg) {
+void ReferenceQueue::ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback,
+                                           void* arg) {
   if (UNLIKELY(IsEmpty())) {
     return;
   }
   mirror::Reference* const head = list_;
   mirror::Reference* ref = head;
   do {
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address != nullptr && forward_address != referent) {
-        ref->SetReferent<false>(forward_address);
-      }
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr) {
+      UNUSED(preserve_callback(referent_addr, arg));
     }
     ref = ref->GetPendingNext();
   } while (LIKELY(ref != head));
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 1d8cc1aefd..8ef0d20925 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -58,23 +58,22 @@ class ReferenceQueue {
   mirror::Reference* DequeuePendingReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
   // zombie field, and the referent field is cleared.
-  void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                  IsMarkedCallback* is_marked_callback,
+  void EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+                                  IsHeapReferenceMarkedCallback* is_marked_callback,
                                   MarkObjectCallback* mark_object_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void ForwardSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
+  void ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
-  void ClearWhiteReferences(ReferenceQueue& cleared_references,
-                            IsMarkedCallback* is_marked_callback,
-                            void* arg)
+  void ClearWhiteReferences(ReferenceQueue* cleared_references,
+                            IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Dump(std::ostream& os) const
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsEmpty() const {
     return list_ == nullptr;
   }
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index 790f4d0c17..b787233b37 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -80,7 +80,7 @@ inline mirror::Object* IndirectReferenceTable::Get(IndirectRef iref) const {
   mirror::Object* obj = *root;
   if (LIKELY(obj != kClearedJniWeakGlobal)) {
     // The read barrier or VerifyObject won't handle kClearedJniWeakGlobal.
-    obj = ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(root);
+    obj = ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(root);
     VerifyObject(obj);
   }
   return obj;
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 756ac9606e..98e1d21d93 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -280,7 +280,7 @@ void IndirectReferenceTable::Dump(std::ostream& os) const {
       // We need a read barrier if weak globals. Since this is for
       // debugging where performance isn't top priority, we
       // unconditionally enable the read barrier, which is conservative.
-      obj = ReadBarrier::BarrierForWeakRoot<mirror::Object, kWithReadBarrier>(root);
+      obj = ReadBarrier::BarrierForRoot<mirror::Object, kWithReadBarrier>(root);
       entries.push_back(obj);
     }
   }
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index f12043e944..14305006e4 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -38,6 +38,16 @@ size_t InternTable::Size() const {
   return strong_interns_.size() + weak_interns_.size();
 }
 
+size_t InternTable::StrongSize() const {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  return strong_interns_.size();
+}
+
+size_t InternTable::WeakSize() const {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  return weak_interns_.size();
+}
+
 void InternTable::DumpForSigQuit(std::ostream& os) const {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   os << "Intern table: " << strong_interns_.size() << " strong; "
@@ -83,24 +93,21 @@ void InternTable::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags f
 }
 
 mirror::String* InternTable::LookupStrong(mirror::String* s, int32_t hash_code) {
-  return Lookup<kWithoutReadBarrier>(&strong_interns_, s, hash_code);
+  return Lookup(&strong_interns_, s, hash_code);
 }
 
 mirror::String* InternTable::LookupWeak(mirror::String* s, int32_t hash_code) {
   // Weak interns need a read barrier because they are weak roots.
-  return Lookup<kWithReadBarrier>(&weak_interns_, s, hash_code);
+  return Lookup(&weak_interns_, s, hash_code);
 }
 
-template<ReadBarrierOption kReadBarrierOption>
 mirror::String* InternTable::Lookup(Table* table, mirror::String* s, int32_t hash_code) {
-  CHECK_EQ(table == &weak_interns_, kReadBarrierOption == kWithReadBarrier)
-      << "Only weak_interns_ needs a read barrier.";
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
   for (auto it = table->lower_bound(hash_code), end = table->end();
        it != end && it->first == hash_code; ++it) {
-    mirror::String** weak_root = &it->second;
-    mirror::String* existing_string =
-        ReadBarrier::BarrierForWeakRoot<mirror::String, kReadBarrierOption>(weak_root);
+    mirror::String* existing_string;
+    mirror::String** root = &it->second;
+    existing_string = ReadBarrier::BarrierForRoot<mirror::String, kWithReadBarrier>(root);
     if (existing_string->Equals(s)) {
       return existing_string;
     }
@@ -130,7 +137,7 @@ mirror::String* InternTable::InsertWeak(mirror::String* s, int32_t hash_code) {
 }
 
 void InternTable::RemoveStrong(mirror::String* s, int32_t hash_code) {
-  Remove<kWithoutReadBarrier>(&strong_interns_, s, hash_code);
+  Remove(&strong_interns_, s, hash_code);
 }
 
 void InternTable::RemoveWeak(mirror::String* s, int32_t hash_code) {
@@ -138,18 +145,15 @@ void InternTable::RemoveWeak(mirror::String* s, int32_t hash_code) {
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringRemoval(s, hash_code);
   }
-  Remove<kWithReadBarrier>(&weak_interns_, s, hash_code);
+  Remove(&weak_interns_, s, hash_code);
 }
 
-template<ReadBarrierOption kReadBarrierOption>
 void InternTable::Remove(Table* table, mirror::String* s, int32_t hash_code) {
-  CHECK_EQ(table == &weak_interns_, kReadBarrierOption == kWithReadBarrier)
-      << "Only weak_interns_ needs a read barrier.";
   for (auto it = table->lower_bound(hash_code), end = table->end();
        it != end && it->first == hash_code; ++it) {
-    mirror::String** weak_root = &it->second;
-    mirror::String* existing_string =
-        ReadBarrier::BarrierForWeakRoot<mirror::String, kReadBarrierOption>(weak_root);
+    mirror::String* existing_string;
+    mirror::String** root = &it->second;
+    existing_string = ReadBarrier::BarrierForRoot<mirror::String, kWithReadBarrier>(root);
     if (existing_string == s) {
       table->erase(it);
       return;
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 3df2aebb47..6dc7f7b606 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -64,6 +64,8 @@ class InternTable {
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t Size() const;
+  size_t StrongSize() const;
+  size_t WeakSize() const;
 
   void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags);
 
@@ -83,7 +85,6 @@ class InternTable {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::String* LookupWeak(mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::String* Lookup(Table* table, mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::String* InsertStrong(mirror::String* s, int32_t hash_code)
@@ -96,7 +97,6 @@ class InternTable {
   void RemoveWeak(mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   void Remove(Table* table, mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
@@ -117,12 +117,16 @@ class InternTable {
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable new_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
+  // Since this contains (strong) roots, they need a read barrier to
+  // enable concurrent intern table (strong) root scan. Do not
+  // directly access the strings in it. Use functions that contain
+  // read barriers.
   Table strong_interns_ GUARDED_BY(Locks::intern_table_lock_);
   std::vector<std::pair<int32_t, mirror::String*>> new_strong_intern_roots_
       GUARDED_BY(Locks::intern_table_lock_);
-  // Since weak_interns_ contain weak roots, they need a read
-  // barrier. Do not directly access the strings in it. Use functions
-  // that contain read barriers.
+  // Since this contains (weak) roots, they need a read barrier. Do
+  // not directly access the strings in it. Use functions that contain
+  // read barriers.
   Table weak_interns_ GUARDED_BY(Locks::intern_table_lock_);
 };
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 66406bfa73..fc5d5905cb 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -135,6 +135,8 @@ static jmethodID FindMethodID(ScopedObjectAccess& soa, jclass jni_class,
   mirror::ArtMethod* method = nullptr;
   if (is_static) {
     method = c->FindDirectMethod(name, sig);
+  } else if (c->IsInterface()) {
+    method = c->FindInterfaceMethod(name, sig);
   } else {
     method = c->FindVirtualMethod(name, sig);
     if (method == nullptr) {
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index f182e950bd..5e46c57de1 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -268,32 +268,38 @@ TEST_F(JniInternalTest, GetMethodID) {
   jclass jlobject = env_->FindClass("java/lang/Object");
   jclass jlstring = env_->FindClass("java/lang/String");
   jclass jlnsme = env_->FindClass("java/lang/NoSuchMethodError");
+  jclass jncrbc = env_->FindClass("java/nio/channels/ReadableByteChannel");
 
-  // Sanity check that no exceptions are pending
+  // Sanity check that no exceptions are pending.
   ASSERT_FALSE(env_->ExceptionCheck());
 
   // Check that java.lang.Object.foo() doesn't exist and NoSuchMethodError is
-  // a pending exception
+  // a pending exception.
   jmethodID method = env_->GetMethodID(jlobject, "foo", "()V");
   EXPECT_EQ(nullptr, method);
   ExpectException(jlnsme);
 
-  // Check that java.lang.Object.equals() does exist
+  // Check that java.lang.Object.equals() does exist.
   method = env_->GetMethodID(jlobject, "equals", "(Ljava/lang/Object;)Z");
   EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
 
   // Check that GetMethodID for java.lang.String.valueOf(int) fails as the
-  // method is static
+  // method is static.
   method = env_->GetMethodID(jlstring, "valueOf", "(I)Ljava/lang/String;");
   EXPECT_EQ(nullptr, method);
   ExpectException(jlnsme);
 
-  // Check that GetMethodID for java.lang.NoSuchMethodError.<init>(String) finds the constructor
+  // Check that GetMethodID for java.lang.NoSuchMethodError.<init>(String) finds the constructor.
   method = env_->GetMethodID(jlnsme, "<init>", "(Ljava/lang/String;)V");
   EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
 
+  // Check that GetMethodID can find a interface method inherited from another interface.
+  method = env_->GetMethodID(jncrbc, "close", "()V");
+  EXPECT_NE(nullptr, method);
+  EXPECT_FALSE(env_->ExceptionCheck());
+
   // Bad arguments.
   CheckJniAbortCatcher check_jni_abort_catcher;
   method = env_->GetMethodID(nullptr, "<init>", "(Ljava/lang/String;)V");
@@ -630,11 +636,13 @@ TEST_F(JniInternalTest, GetPrimitiveArrayElementsOfWrongType) {
   jni_abort_catcher.Check(
       "attempt to get double primitive array elements with an object of type boolean[]");
   jbyteArray array2 = env_->NewByteArray(10);
-  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), &is_copy), nullptr);
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), &is_copy),
+            nullptr);
   jni_abort_catcher.Check(
       "attempt to get boolean primitive array elements with an object of type byte[]");
   jobject object = env_->NewStringUTF("Test String");
-  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), &is_copy), nullptr);
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), &is_copy),
+            nullptr);
   jni_abort_catcher.Check(
       "attempt to get boolean primitive array elements with an object of type java.lang.String");
 }
@@ -681,7 +689,8 @@ TEST_F(JniInternalTest, ReleasePrimitiveArrayElementsOfWrongType) {
   jobject object = env_->NewStringUTF("Test String");
   env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), elements, 0);
   jni_abort_catcher.Check(
-      "attempt to release boolean primitive array elements with an object of type java.lang.String");
+      "attempt to release boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 TEST_F(JniInternalTest, GetReleasePrimitiveArrayCriticalOfWrongType) {
   CheckJniAbortCatcher jni_abort_catcher;
@@ -736,7 +745,8 @@ TEST_F(JniInternalTest, GetPrimitiveArrayRegionElementsOfWrongType) {
   env_->GetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
                               reinterpret_cast<jboolean*>(elements));
   jni_abort_catcher.Check(
-      "attempt to get region of boolean primitive array elements with an object of type java.lang.String");
+      "attempt to get region of boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 
 TEST_F(JniInternalTest, SetPrimitiveArrayRegionElementsOfWrongType) {
@@ -782,7 +792,8 @@ TEST_F(JniInternalTest, SetPrimitiveArrayRegionElementsOfWrongType) {
   env_->SetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
                               reinterpret_cast<jboolean*>(elements));
   jni_abort_catcher.Check(
-      "attempt to set region of boolean primitive array elements with an object of type java.lang.String");
+      "attempt to set region of boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 
 TEST_F(JniInternalTest, NewObjectArray) {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index a20f7b941e..c7981804df 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -37,17 +37,17 @@
 namespace art {
 namespace mirror {
 
-Class* Class::java_lang_Class_ = NULL;
+Class* Class::java_lang_Class_ = nullptr;
 
 void Class::SetClassClass(Class* java_lang_Class) {
-  CHECK(java_lang_Class_ == NULL) << java_lang_Class_ << " " << java_lang_Class;
-  CHECK(java_lang_Class != NULL);
+  CHECK(java_lang_Class_ == nullptr) << java_lang_Class_ << " " << java_lang_Class;
+  CHECK(java_lang_Class != nullptr);
   java_lang_Class_ = java_lang_Class;
 }
 
 void Class::ResetClass() {
-  CHECK(java_lang_Class_ != NULL);
-  java_lang_Class_ = NULL;
+  CHECK(java_lang_Class_ != nullptr);
+  java_lang_Class_ = nullptr;
 }
 
 void Class::VisitRoots(RootCallback* callback, void* arg) {
@@ -146,7 +146,7 @@ String* Class::ComputeName(Handle<Class> h_this) {
   if ((descriptor[0] != 'L') && (descriptor[0] != '[')) {
     // The descriptor indicates that this is the class for
     // a primitive type; special-case the return value.
-    const char* c_name = NULL;
+    const char* c_name = nullptr;
     switch (descriptor[0]) {
     case 'Z': c_name = "boolean"; break;
     case 'B': c_name = "byte";    break;
@@ -196,10 +196,10 @@ void Class::DumpClass(std::ostream& os, int flags) {
   os << "----- " << (IsInterface() ? "interface" : "class") << " "
      << "'" << GetDescriptor() << "' cl=" << GetClassLoader() << " -----\n",
   os << "  objectSize=" << SizeOf() << " "
-     << "(" << (h_super.Get() != NULL ? h_super->SizeOf() : -1) << " from super)\n",
+     << "(" << (h_super.Get() != nullptr ? h_super->SizeOf() : -1) << " from super)\n",
   os << StringPrintf("  access=0x%04x.%04x\n",
       GetAccessFlags() >> 16, GetAccessFlags() & kAccJavaFlagsMask);
-  if (h_super.Get() != NULL) {
+  if (h_super.Get() != nullptr) {
     os << "  super='" << PrettyClass(h_super.Get()) << "' (cl=" << h_super->GetClassLoader()
        << ")\n";
   }
@@ -217,7 +217,7 @@ void Class::DumpClass(std::ostream& os, int flags) {
   }
   // After this point, this may have moved due to GetDirectInterface.
   os << "  vtable (" << h_this->NumVirtualMethods() << " entries, "
-     << (h_super.Get() != NULL ? h_super->NumVirtualMethods() : 0) << " in super):\n";
+     << (h_super.Get() != nullptr ? h_super->NumVirtualMethods() : 0) << " in super):\n";
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     os << StringPrintf("    %2zd: %s\n", i,
                        PrettyMethod(h_this->GetVirtualMethodDuringLinking(i)).c_str());
@@ -253,7 +253,7 @@ void Class::SetReferenceInstanceOffsets(uint32_t new_reference_offsets) {
     // Sanity check that the number of bits set in the reference offset bitmap
     // agrees with the number of references
     size_t count = 0;
-    for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+    for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
       count += c->NumReferenceInstanceFieldsDuringLinking();
     }
     CHECK_EQ((size_t)POPCOUNT(new_reference_offsets), count);
@@ -329,40 +329,58 @@ void Class::SetClassLoader(ClassLoader* new_class_loader) {
   }
 }
 
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const StringPiece& signature) {
+  // Check the current class before checking the interfaces.
+  ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
+  if (method != nullptr) {
+    return method;
+  }
+
+  int32_t iftable_count = GetIfTableCount();
+  IfTable* iftable = GetIfTable();
+  for (int32_t i = 0; i < iftable_count; ++i) {
+    method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature);
+    if (method != nullptr) {
+      return method;
+    }
+  }
+  return nullptr;
+}
+
 ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
-  if (method != NULL) {
+  if (method != nullptr) {
     return method;
   }
 
   int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
+  for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-  if (method != NULL) {
+  if (method != nullptr) {
     return method;
   }
 
   int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
+  for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) {
@@ -372,7 +390,7 @@ ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const String
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) {
@@ -382,7 +400,7 @@ ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signat
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
@@ -394,37 +412,37 @@ ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t d
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) {
@@ -434,7 +452,7 @@ ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Strin
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature) {
@@ -444,7 +462,7 @@ ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Signa
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
@@ -456,37 +474,37 @@ ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const StringPiece& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindClassInitializer() {
@@ -498,7 +516,7 @@ ArtMethod* Class::FindClassInitializer() {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
@@ -510,7 +528,7 @@ ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const String
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
@@ -522,42 +540,42 @@ ArtField* Class::FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t d
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindInstanceField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(name, type);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(dex_cache, dex_field_idx);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredStaticField(const StringPiece& name, const StringPiece& type) {
-  DCHECK(type != NULL);
+  DCHECK(type != nullptr);
   for (size_t i = 0; i < NumStaticFields(); ++i) {
     ArtField* f = GetStaticField(i);
     if (name == f->GetName() && type == f->GetTypeDescriptor()) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
@@ -569,7 +587,7 @@ ArtField* Class::FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const StringPiece& name,
@@ -603,7 +621,7 @@ ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCac
   for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
     // Wrap k incase it moves during GetDirectInterface.
@@ -625,7 +643,7 @@ ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCac
 ArtField* Class::FindField(Thread* self, Handle<Class> klass, const StringPiece& name,
                            const StringPiece& type) {
   // Find a field using the JLS field resolution order
-  for (Class* k = klass.Get(); k != NULL; k = k->GetSuperClass()) {
+  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredInstanceField(name, type);
     if (f != nullptr) {
@@ -652,10 +670,10 @@ ArtField* Class::FindField(Thread* self, Handle<Class> klass, const StringPiece&
 
 static void SetPreverifiedFlagOnMethods(mirror::ObjectArray<mirror::ArtMethod>* methods)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (methods != NULL) {
+  if (methods != nullptr) {
     for (int32_t index = 0, end = methods->GetLength(); index < end; ++index) {
       mirror::ArtMethod* method = methods->GetWithoutChecks(index);
-      DCHECK(method != NULL);
+      DCHECK(method != nullptr);
       if (!method->IsNative() && !method->IsAbstract()) {
         method->SetPreverified();
       }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 90381a7dec..c83f411802 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -648,6 +648,9 @@ class MANAGED Class : public Object {
   ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ArtMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& signature)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 0b6e759097..9c9d87be01 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -21,6 +21,13 @@
 
 namespace art {
 
+namespace gc {
+
+class ReferenceProcessor;
+class ReferenceQueue;
+
+}  // namespace gc
+
 struct ReferenceOffsets;
 struct FinalizerReferenceOffsets;
 
@@ -41,7 +48,6 @@ class MANAGED Reference : public Object {
   static MemberOffset ReferentOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Reference, referent_);
   }
-
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Object* GetReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldObjectVolatile<Object, kDefaultVerifyFlags, kReadBarrierOption>(
@@ -55,7 +61,6 @@ class MANAGED Reference : public Object {
   void ClearReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
-
   // Volatile read/write is not necessary since the java pending next is only accessed from
   // the java threads for cleared references. Once these cleared references have a null referent,
   // we never end up reading their pending next from the GC again.
@@ -76,6 +81,11 @@ class MANAGED Reference : public Object {
   bool IsEnqueuable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  // Note: This avoids a read barrier, it should only be used by the GC.
+  HeapReference<Object>* GetReferentReferenceAddr() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObjectReferenceAddr<kDefaultVerifyFlags>(ReferentOffset());
+  }
+
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Reference> pending_next_;  // Note this is Java volatile:
   HeapReference<Object> queue_;  // Note this is Java volatile:
@@ -83,6 +93,8 @@ class MANAGED Reference : public Object {
   HeapReference<Object> referent_;  // Note this is Java volatile:
 
   friend struct art::ReferenceOffsets;  // for verifying offset information
+  friend class gc::ReferenceProcessor;
+  friend class gc::ReferenceQueue;
   DISALLOW_IMPLICIT_CONSTRUCTORS(Reference);
 };
 
diff --git a/runtime/monitor.h b/runtime/monitor.h
index bd0e23cafe..a28823d184 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -95,7 +95,7 @@ class Monitor {
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetObject() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(&obj_);
+    return ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(&obj_);
   }
 
   void SetObject(mirror::Object* object);
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index dd8ce16f74..d8c1c402b9 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -70,6 +70,11 @@ typedef void (DelayReferenceReferentCallback)(mirror::Class* klass, mirror::Refe
 // address the object (if the object didn't move, returns the object input parameter).
 typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg)
     __attribute__((warn_unused_result));
+
+// Returns true if the object in the heap reference is marked, if it is marked and has moved the
+// callback updates the heap reference contain the new value.
+typedef bool (IsHeapReferenceMarkedCallback)(mirror::HeapReference<mirror::Object>* object,
+    void* arg) __attribute__((warn_unused_result));
 typedef void (ProcessMarkStackCallback)(void* arg);
 
 }  // namespace art
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index cff5ec383f..87106d6003 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -197,7 +197,7 @@ bool ParsedOptions::Parse(const Runtime::Options& options, bool ignore_unrecogni
 #endif
   // If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
   // parsing options.
-  background_collector_type_ = gc::kCollectorTypeNone;
+  background_collector_type_ = gc::kCollectorTypeSS;
   stack_size_ = 0;  // 0 means default.
   max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   low_memory_mode_ = false;
@@ -556,11 +556,11 @@ bool ParsedOptions::Parse(const Runtime::Options& options, bool ignore_unrecogni
     } else if (option == "-Xprofile-start-immediately") {
       profiler_options_.start_immediately_ = true;
     } else if (StartsWith(option, "-Xprofile-top-k-threshold:")) {
-      if (!ParseDouble(option, ':', 10.0, 90.0, &profiler_options_.top_k_threshold_)) {
+      if (!ParseDouble(option, ':', 0.0, 100.0, &profiler_options_.top_k_threshold_)) {
         return false;
       }
     } else if (StartsWith(option, "-Xprofile-top-k-change-threshold:")) {
-      if (!ParseDouble(option, ':', 10.0, 90.0, &profiler_options_.top_k_change_threshold_)) {
+      if (!ParseDouble(option, ':', 0.0, 100.0, &profiler_options_.top_k_change_threshold_)) {
         return false;
       }
     } else if (StartsWith(option, "-implicit-checks:")) {
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index e252b7bb83..fd43d78835 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -44,8 +44,8 @@ inline MirrorType* ReadBarrier::Barrier(
 }
 
 template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
-inline MirrorType* ReadBarrier::BarrierForWeakRoot(MirrorType** weak_root) {
-  MirrorType* ref = *weak_root;
+inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root) {
+  MirrorType* ref = *root;
   const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
     // To be implemented.
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 7232a3febe..451d13c5db 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -39,7 +39,7 @@ class ReadBarrier {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  ALWAYS_INLINE static MirrorType* BarrierForWeakRoot(MirrorType** weak_root)
+  ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 };
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 7e922c59f0..132ac3e795 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -142,7 +142,8 @@ size_t StackVisitor::GetNativePcOffset() const {
   return GetMethod()->NativePcOffset(cur_quick_frame_pc_);
 }
 
-uint32_t StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const {
+bool StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind,
+                           uint32_t* val) const {
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
@@ -155,19 +156,30 @@ uint32_t StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kin
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-      return GetGPR(vmap_table.ComputeRegister(spill_mask, vmap_offset, kind));
+      uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      uintptr_t ptr_val;
+      bool success = false;
+      if (is_float) {
+        success = GetFPR(reg, &ptr_val);
+      } else {
+        success = GetGPR(reg, &ptr_val);
+      }
+      *val = ptr_val;
+      return success;
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
-      return *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
+      *val = *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
                           frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      return true;
     }
   } else {
-    return cur_shadow_frame_->GetVReg(vreg);
+    *val = cur_shadow_frame_->GetVReg(vreg);
+    return true;
   }
 }
 
-void StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
+bool StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
                            VRegKind kind) {
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably write registers without a context.
@@ -181,8 +193,12 @@ void StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_val
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-      const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kReferenceVReg);
-      SetGPR(reg, new_value);
+      const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      if (is_float) {
+        return SetFPR(reg, new_value);
+      } else {
+        return SetGPR(reg, new_value);
+      }
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
@@ -190,9 +206,11 @@ void StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_val
                                  frame_info.FrameSizeInBytes(), vreg, kRuntimeISA);
       byte* vreg_addr = reinterpret_cast<byte*>(GetCurrentQuickFrame()) + offset;
       *reinterpret_cast<uint32_t*>(vreg_addr) = new_value;
+      return true;
     }
   } else {
-    return cur_shadow_frame_->SetVReg(vreg, new_value);
+    cur_shadow_frame_->SetVReg(vreg, new_value);
+    return true;
   }
 }
 
@@ -201,14 +219,24 @@ uintptr_t* StackVisitor::GetGPRAddress(uint32_t reg) const {
   return context_->GetGPRAddress(reg);
 }
 
-uintptr_t StackVisitor::GetGPR(uint32_t reg) const {
+bool StackVisitor::GetGPR(uint32_t reg, uintptr_t* val) const {
+  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
+  return context_->GetGPR(reg, val);
+}
+
+bool StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
+  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
+  return context_->SetGPR(reg, value);
+}
+
+bool StackVisitor::GetFPR(uint32_t reg, uintptr_t* val) const {
   DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  return context_->GetGPR(reg);
+  return context_->GetFPR(reg, val);
 }
 
-void StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
+bool StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
   DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  context_->SetGPR(reg, value);
+  return context_->SetFPR(reg, value);
 }
 
 uintptr_t StackVisitor::GetReturnPc() const {
diff --git a/runtime/stack.h b/runtime/stack.h
index 199111563f..9402cddf56 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -561,15 +561,21 @@ class StackVisitor {
   bool GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const
+  bool GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
+  uint32_t GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t val;
+    bool success = GetVReg(m, vreg, kind, &val);
+    CHECK(success) << "Failed to read vreg " << vreg << " of kind " << kind;
+    return val;
+  }
+
+  bool SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uintptr_t* GetGPRAddress(uint32_t reg) const;
-  uintptr_t GetGPR(uint32_t reg) const;
-  void SetGPR(uint32_t reg, uintptr_t value);
 
   // This is a fast-path for getting/setting values in a quick frame.
   uint32_t* GetVRegAddr(StackReference<mirror::ArtMethod>* cur_quick_frame,
@@ -700,6 +706,11 @@ class StackVisitor {
   StackVisitor(Thread* thread, Context* context, size_t num_frames)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool GetGPR(uint32_t reg, uintptr_t* val) const;
+  bool SetGPR(uint32_t reg, uintptr_t value);
+  bool GetFPR(uint32_t reg, uintptr_t* val) const;
+  bool SetFPR(uint32_t reg, uintptr_t value);
+
   instrumentation::InstrumentationStackFrame& GetInstrumentationStackFrame(uint32_t depth) const;
 
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1d041514ed..c9c3bbabdf 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -274,7 +274,7 @@ MethodVerifier::FailureKind MethodVerifier::VerifyMethod(uint32_t method_idx,
     result = kHardFailure;
   }
   uint64_t duration_ns = NanoTime() - start_ns;
-  if (duration_ns > MsToNs(100)) {
+  if (duration_ns > MsToNs(100) && !kIsDebugBuild) {
     LOG(WARNING) << "Verification of " << PrettyMethod(method_idx, *dex_file)
                  << " took " << PrettyDuration(duration_ns);
   }
@@ -3006,59 +3006,11 @@ mirror::ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(uint32_t dex_meth
   return res_method;
 }
 
-mirror::ArtMethod* MethodVerifier::VerifyInvocationArgs(const Instruction* inst,
-                                                             MethodType method_type,
-                                                             bool is_range,
-                                                             bool is_super) {
-  // Resolve the method. This could be an abstract or concrete method depending on what sort of call
-  // we're making.
-  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-
-  // As the method may not have been resolved, make this static check against what we expect.
-  const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-  uint32_t shorty_idx = dex_file_->GetProtoId(method_id.proto_idx_).shorty_idx_;
-  uint32_t shorty_len;
-  const char* descriptor = dex_file_->StringDataAndUtf16LengthByIdx(shorty_idx, &shorty_len);
-  int32_t sig_registers = method_type == METHOD_STATIC ? 0 : 1;
-  for (size_t i = 1; i < shorty_len; i++) {
-    if (descriptor[i] == 'J' || descriptor[i] == 'D') {
-      sig_registers += 2;
-    } else {
-      sig_registers++;
-    }
-  }
-  if (inst->VRegA() != sig_registers) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
-        " arguments, found " << sig_registers;
-    return nullptr;
-  }
-
-  mirror::ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
-  if (res_method == NULL) {  // error or class is unresolved
-    return NULL;
-  }
-
-  // If we're using invoke-super(method), make sure that the executing method's class' superclass
-  // has a vtable entry for the target method.
-  if (is_super) {
-    DCHECK(method_type == METHOD_VIRTUAL);
-    const RegType& super = GetDeclaringClass().GetSuperClass(&reg_types_);
-    if (super.IsUnresolvedTypes()) {
-      Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
-                                   << PrettyMethod(dex_method_idx_, *dex_file_)
-                                   << " to super " << PrettyMethod(res_method);
-      return NULL;
-    }
-    mirror::Class* super_klass = super.GetClass();
-    if (res_method->GetMethodIndex() >= super_klass->GetVTable()->GetLength()) {
-      Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
-                                   << PrettyMethod(dex_method_idx_, *dex_file_)
-                                   << " to super " << super
-                                   << "." << res_method->GetName()
-                                   << res_method->GetSignature();
-      return NULL;
-    }
-  }
+template <class T>
+mirror::ArtMethod* MethodVerifier::VerifyInvocationArgsFromIterator(T* it, const Instruction* inst,
+                                                                    MethodType method_type,
+                                                                    bool is_range,
+                                                                    mirror::ArtMethod* res_method) {
   // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
   // match the call to the signature. Also, we might be calling through an abstract method
   // definition (which doesn't have register count values).
@@ -3068,83 +3020,193 @@ mirror::ArtMethod* MethodVerifier::VerifyInvocationArgs(const Instruction* inst,
   if (expected_args > code_item_->outs_size_) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
         << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
-    return NULL;
+    return nullptr;
   }
 
+  uint32_t arg[5];
+  if (!is_range) {
+    inst->GetVarArgs(arg);
+  }
+  uint32_t sig_registers = 0;
+
   /*
    * Check the "this" argument, which must be an instance of the class that declared the method.
    * For an interface class, we don't do the full interface merge (see JoinClass), so we can't do a
    * rigorous check here (which is okay since we have to do it at runtime).
    */
-  size_t actual_args = 0;
-  if (!res_method->IsStatic()) {
+  if (method_type != METHOD_STATIC) {
     const RegType& actual_arg_type = work_line_->GetInvocationThis(inst, is_range);
     if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
-      return NULL;
-    }
-    if (actual_arg_type.IsUninitializedReference() && !res_method->IsConstructor()) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
-      return NULL;
+      CHECK(have_pending_hard_failure_);
+      return nullptr;
+    }
+    if (actual_arg_type.IsUninitializedReference()) {
+      if (res_method) {
+        if (!res_method->IsConstructor()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
+          return nullptr;
+        }
+      } else {
+        // Check whether the name of the called method is "<init>"
+        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+        if (strcmp(dex_file_->GetMethodName(dex_file_->GetMethodId(method_idx)), "init") != 0) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
+          return nullptr;
+        }
+      }
     }
     if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) {
-      mirror::Class* klass = res_method->GetDeclaringClass();
-      const RegType& res_method_class =
-          reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
-                               klass->CannotBeAssignedFromOtherTypes());
-      if (!res_method_class.IsAssignableFrom(actual_arg_type)) {
+      const RegType* res_method_class;
+      if (res_method != nullptr) {
+        mirror::Class* klass = res_method->GetDeclaringClass();
+        res_method_class = &reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
+                                                 klass->CannotBeAssignedFromOtherTypes());
+      } else {
+        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+        const uint16_t class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
+        res_method_class = &reg_types_.FromDescriptor(class_loader_->Get(),
+                                                      dex_file_->StringByTypeIdx(class_idx),
+                                                      false);
+      }
+      if (!res_method_class->IsAssignableFrom(actual_arg_type)) {
         Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
             VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
-            << "' not instance of '" << res_method_class << "'";
-        return NULL;
+                << "' not instance of '" << *res_method_class << "'";
+        // Continue on soft failures. We need to find possible hard failures to avoid problems in
+        // the compiler.
+        if (have_pending_hard_failure_) {
+          return nullptr;
+        }
       }
     }
-    actual_args++;
-  }
-  /*
-   * Process the target method's signature. This signature may or may not
-   * have been verified, so we can't assume it's properly formed.
-   */
-  const DexFile::TypeList* params = res_method->GetParameterTypeList();
-  size_t params_size = params == NULL ? 0 : params->Size();
-  uint32_t arg[5];
-  if (!is_range) {
-    inst->GetVarArgs(arg);
+    sig_registers = 1;
   }
-  for (size_t param_index = 0; param_index < params_size; param_index++) {
-    if (actual_args >= expected_args) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invalid call to '" << PrettyMethod(res_method)
-          << "'. Expected " << expected_args << " arguments, processing argument " << actual_args
-          << " (where longs/doubles count twice).";
-      return NULL;
+
+  for ( ; it->HasNext(); it->Next()) {
+    if (sig_registers >= expected_args) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
+          " arguments, found " << sig_registers << " or more.";
+      return nullptr;
     }
-    const char* descriptor =
-        res_method->GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
-    if (descriptor == NULL) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
-          << " missing signature component";
-      return NULL;
+
+    const char* param_descriptor = it->GetDescriptor();
+
+    if (param_descriptor == nullptr) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation because of missing signature "
+          "component";
+      return nullptr;
     }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
-    uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
+
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), param_descriptor,
+                                                        false);
+    uint32_t get_reg = is_range ? inst->VRegC_3rc() + static_cast<uint32_t>(sig_registers) :
+        arg[sig_registers];
     if (reg_type.IsIntegralTypes()) {
       const RegType& src_type = work_line_->GetRegisterType(get_reg);
       if (!src_type.IsIntegralTypes()) {
         Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "register v" << get_reg << " has type " << src_type
-                                          << " but expected " << reg_type;
+            << " but expected " << reg_type;
         return res_method;
       }
     } else if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
-      return res_method;
+      // Continue on soft failures. We need to find possible hard failures to avoid problems in the
+      // compiler.
+      if (have_pending_hard_failure_) {
+        return res_method;
+      }
     }
-    actual_args = reg_type.IsLongOrDoubleTypes() ? actual_args + 2 : actual_args + 1;
+    sig_registers += reg_type.IsLongOrDoubleTypes() ?  2 : 1;
   }
-  if (actual_args != expected_args) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
-        << " expected " << expected_args << " arguments, found " << actual_args;
-    return NULL;
-  } else {
-    return res_method;
+  if (expected_args != sig_registers) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << expected_args <<
+        " arguments, found " << sig_registers;
+    return nullptr;
+  }
+  return res_method;
+}
+
+void MethodVerifier::VerifyInvocationArgsUnresolvedMethod(const Instruction* inst,
+                                                          MethodType method_type,
+                                                          bool is_range) {
+  // As the method may not have been resolved, make this static check against what we expect.
+  // The main reason for this code block is to fail hard when we find an illegal use, e.g.,
+  // wrong number of arguments or wrong primitive types, even if the method could not be resolved.
+  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  DexFileParameterIterator it(*dex_file_,
+                              dex_file_->GetProtoId(dex_file_->GetMethodId(method_idx).proto_idx_));
+  VerifyInvocationArgsFromIterator<DexFileParameterIterator>(&it, inst, method_type, is_range,
+                                                             nullptr);
+}
+
+class MethodParamListDescriptorIterator {
+ public:
+  explicit MethodParamListDescriptorIterator(mirror::ArtMethod* res_method) :
+      res_method_(res_method), pos_(0), params_(res_method->GetParameterTypeList()),
+      params_size_(params_ == nullptr ? 0 : params_->Size()) {
+  }
+
+  bool HasNext() {
+    return pos_ < params_size_;
+  }
+
+  void Next() {
+    ++pos_;
+  }
+
+  const char* GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return res_method_->GetTypeDescriptorFromTypeIdx(params_->GetTypeItem(pos_).type_idx_);
   }
+
+ private:
+  mirror::ArtMethod* res_method_;
+  size_t pos_;
+  const DexFile::TypeList* params_;
+  const size_t params_size_;
+};
+
+mirror::ArtMethod* MethodVerifier::VerifyInvocationArgs(const Instruction* inst,
+                                                             MethodType method_type,
+                                                             bool is_range,
+                                                             bool is_super) {
+  // Resolve the method. This could be an abstract or concrete method depending on what sort of call
+  // we're making.
+  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+
+  mirror::ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
+  if (res_method == NULL) {  // error or class is unresolved
+    // Check what we can statically.
+    if (!have_pending_hard_failure_) {
+      VerifyInvocationArgsUnresolvedMethod(inst, method_type, is_range);
+    }
+    return nullptr;
+  }
+
+  // If we're using invoke-super(method), make sure that the executing method's class' superclass
+  // has a vtable entry for the target method.
+  if (is_super) {
+    DCHECK(method_type == METHOD_VIRTUAL);
+    const RegType& super = GetDeclaringClass().GetSuperClass(&reg_types_);
+    if (super.IsUnresolvedTypes()) {
+      Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
+                                   << PrettyMethod(dex_method_idx_, *dex_file_)
+                                   << " to super " << PrettyMethod(res_method);
+      return nullptr;
+    }
+    mirror::Class* super_klass = super.GetClass();
+    if (res_method->GetMethodIndex() >= super_klass->GetVTable()->GetLength()) {
+      Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
+                                   << PrettyMethod(dex_method_idx_, *dex_file_)
+                                   << " to super " << super
+                                   << "." << res_method->GetName()
+                                   << res_method->GetSignature();
+      return nullptr;
+    }
+  }
+
+  // Process the target method's signature. This signature may or may not
+  MethodParamListDescriptorIterator it(res_method);
+  return VerifyInvocationArgsFromIterator<MethodParamListDescriptorIterator>(&it, inst, method_type,
+                                                                             is_range, res_method);
 }
 
 mirror::ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst,
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 451c9e2fbe..b6d5b351c3 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -565,6 +565,18 @@ class MethodVerifier {
                                           bool is_range, bool is_super)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Similar checks to the above, but on the proto. Will be used when the method cannot be
+  // resolved.
+  void VerifyInvocationArgsUnresolvedMethod(const Instruction* inst, MethodType method_type,
+                                            bool is_range)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <class T>
+  mirror::ArtMethod* VerifyInvocationArgsFromIterator(T* it, const Instruction* inst,
+                                                      MethodType method_type, bool is_range,
+                                                      mirror::ArtMethod* res_method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::ArtMethod* GetQuickInvokedMethod(const Instruction* inst,
                                            RegisterLine* reg_line,
                                            bool is_range)
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
index 5d6d16aa93..f6729745aa 100755
--- a/test/etc/host-run-test-jar
+++ b/test/etc/host-run-test-jar
@@ -30,6 +30,9 @@ while true; do
             exit 1
         fi
         LIB="$1"
+        if [ `uname` = "Darwin" ]; then
+            LIB=${LIB/%so/dylib}
+        fi
         shift
     elif [ "x$1" = "x--boot" ]; then
         shift
@@ -110,10 +113,16 @@ if [ "$DEBUGGER" = "y" ]; then
 fi
 
 if [ "$GDB" = "y" ]; then
-    gdb=gdb
-    gdbargs="--args $exe"
-    # Enable for Emacs "M-x gdb" support. TODO: allow extra gdb arguments on command line.
-    # gdbargs="--annotate=3 $gdbargs"
+    if [ `uname` = "Darwin" ]; then
+        gdb=lldb
+        gdbargs="-- $exe"
+        exe=
+    else
+        gdb=gdb
+        gdbargs="--args $exe"
+        # Enable for Emacs "M-x gdb" support. TODO: allow extra gdb arguments on command line.
+        # gdbargs="--annotate=3 $gdbargs"
+    fi
 fi
 
 if [ "$INTERPRETER" = "y" ]; then