Merge "Relax limits for profiler options"
diff --git a/Android.mk b/Android.mk
index a30c090..f7f65ac 100644
--- a/Android.mk
+++ b/Android.mk
@@ -97,7 +97,7 @@
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
 ART_HOST_DEPENDENCIES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar
-ART_HOST_DEPENDENCIES += $(HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+ART_HOST_DEPENDENCIES += $(HOST_LIBRARY_PATH)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
 ART_TARGET_DEPENDENCIES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar $(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
 ifdef TARGET_2ND_ARCH
 ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
@@ -166,7 +166,7 @@
 	@echo test-art-host-interpreter PASSED
 
 .PHONY: test-art-host-dependencies
-test-art-host-dependencies: $(ART_HOST_TEST_DEPENDENCIES) $(HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) $(HOST_CORE_DEX_LOCATIONS)
+test-art-host-dependencies: $(ART_HOST_TEST_DEPENDENCIES) $(HOST_LIBRARY_PATH)/libarttest$(ART_HOST_SHLIB_EXTENSION) $(HOST_CORE_DEX_LOCATIONS)
 
 .PHONY: test-art-host-gtest
 test-art-host-gtest: $(ART_HOST_GTEST_TARGETS)
diff --git a/build/Android.common.mk b/build/Android.common.mk
index aa167b2..09f34b3 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -34,10 +34,14 @@
 #
 ART_BUILD_TARGET_NDEBUG ?= true
 ART_BUILD_TARGET_DEBUG ?= true
-ART_BUILD_HOST_NDEBUG ?= $(WITH_HOST_DALVIK)
-ART_BUILD_HOST_DEBUG ?= $(WITH_HOST_DALVIK)
+ART_BUILD_HOST_NDEBUG ?= true
+ART_BUILD_HOST_DEBUG ?= true
 
+ifeq ($(HOST_PREFER_32_BIT),true)
+ART_HOST_ARCH := $(HOST_2ND_ARCH)
+else
 ART_HOST_ARCH := $(HOST_ARCH)
+endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),false)
 $(info Disabling ART_BUILD_TARGET_NDEBUG)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index b07e4f8..4c2cda4 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -25,6 +25,7 @@
 	runtime/base/hex_dump_test.cc \
 	runtime/base/histogram_test.cc \
 	runtime/base/mutex_test.cc \
+	runtime/base/scoped_flock_test.cc \
 	runtime/base/timing_logger_test.cc \
 	runtime/base/unix_file/fd_file_test.cc \
 	runtime/base/unix_file/mapped_file_test.cc \
@@ -33,9 +34,11 @@
 	runtime/base/unix_file/string_file_test.cc \
 	runtime/class_linker_test.cc \
 	runtime/dex_file_test.cc \
+	runtime/dex_file_verifier_test.cc \
 	runtime/dex_instruction_visitor_test.cc \
 	runtime/dex_method_iterator_test.cc \
 	runtime/entrypoints/math_entrypoints_test.cc \
+	runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc \
 	runtime/entrypoints_order_test.cc \
 	runtime/exception_test.cc \
 	runtime/gc/accounting/space_bitmap_test.cc \
@@ -80,6 +83,7 @@
 	compiler/optimizing/codegen_test.cc \
 	compiler/optimizing/dominator_test.cc \
 	compiler/optimizing/find_loops_test.cc \
+	compiler/optimizing/graph_test.cc \
 	compiler/optimizing/linearize_test.cc \
 	compiler/optimizing/liveness_test.cc \
 	compiler/optimizing/live_interval_test.cc \
@@ -247,11 +251,9 @@
   $(foreach file,$(RUNTIME_GTEST_TARGET_SRC_FILES), $(eval $(call build-art-test,target,$(file),,)))
   $(foreach file,$(COMPILER_GTEST_TARGET_SRC_FILES), $(eval $(call build-art-test,target,$(file),art/compiler,libartd-compiler)))
 endif
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_HOST),true)
-    $(foreach file,$(RUNTIME_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),,)))
-    $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),art/compiler,libartd-compiler)))
-  endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(foreach file,$(RUNTIME_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),,)))
+  $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),art/compiler,libartd-compiler)))
 endif
 
 # Used outside the art project to get a list of the current tests
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index b4c99b5..76e5af0 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -74,8 +74,6 @@
 ifeq ($(ART_BUILD_TARGET),true)
   $(eval $(call build-libarttest,target))
 endif
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_HOST),true)
-    $(eval $(call build-libarttest,host))
-  endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(eval $(call build-libarttest,host))
 endif
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index bf07ecc..fbb7eb3 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -74,7 +74,6 @@
 endif # ART_BUILD_HOST
 
 # If we aren't building the host toolchain, skip building the target core.art.
-ifeq ($(WITH_HOST_DALVIK),true)
 ifeq ($(ART_BUILD_TARGET),true)
 include $(CLEAR_VARS)
 LOCAL_MODULE := core.art
@@ -84,4 +83,3 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += $(TARGET_CORE_IMG_OUT)
 include $(BUILD_PHONY_PACKAGE)
 endif # ART_BUILD_TARGET
-endif # WITH_HOST_DALVIK
diff --git a/compiler/Android.mk b/compiler/Android.mk
index cfce9f7..6d2f5d1 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -48,6 +48,7 @@
 	dex/quick/mips/utility_mips.cc \
 	dex/quick/mir_to_lir.cc \
 	dex/quick/ralloc_util.cc \
+	dex/quick/resource_mask.cc \
 	dex/quick/x86/assemble_x86.cc \
 	dex/quick/x86/call_x86.cc \
 	dex/quick/x86/fp_x86.cc \
@@ -83,6 +84,7 @@
 	optimizing/code_generator.cc \
 	optimizing/code_generator_arm.cc \
 	optimizing/code_generator_x86.cc \
+	optimizing/code_generator_x86_64.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
@@ -91,6 +93,7 @@
 	optimizing/register_allocator.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
+	optimizing/ssa_type_propagation.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
@@ -272,14 +275,12 @@
 
 endef
 
-ifeq ($(WITH_HOST_DALVIK),true)
-  # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-libart-compiler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-libart-compiler,host,debug))
-  endif
+# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-libart-compiler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-libart-compiler,host,debug))
 endif
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
   $(eval $(call build-libart-compiler,target,ndebug))
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index eb48cc3..55a4c78 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -48,10 +48,16 @@
   kArg1,
   kArg2,
   kArg3,
+  kArg4,
+  kArg5,
   kFArg0,
   kFArg1,
   kFArg2,
   kFArg3,
+  kFArg4,
+  kFArg5,
+  kFArg6,
+  kFArg7,
   kRet0,
   kRet1,
   kInvokeTgt,
@@ -76,22 +82,6 @@
   kDead,
 };
 
-/*
- * Def/Use encoding in 64-bit use_mask/def_mask.  Low positions used for target-specific
- * registers (and typically use the register number as the position).  High positions
- * reserved for common and abstract resources.
- */
-
-enum ResourceEncodingPos {
-  kMustNotAlias = 63,
-  kHeapRef = 62,          // Default memory reference type.
-  kLiteral = 61,          // Literal pool memory reference.
-  kDalvikReg = 60,        // Dalvik v_reg memory reference.
-  kFPStatus = 59,
-  kCCode = 58,
-  kLowestCommonResource = kCCode
-};
-
 // Shared pseudo opcodes - must be < 0.
 enum LIRPseudoOpcode {
   kPseudoExportedPC = -16,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 5b9c763..89b1a75 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -143,13 +143,25 @@
     Instruction::MOVE,
     Instruction::MOVE_FROM16,
     Instruction::MOVE_16,
+    Instruction::MOVE_WIDE,
+    Instruction::MOVE_WIDE_FROM16,
+    Instruction::MOVE_WIDE_16,
+    Instruction::MOVE_OBJECT,
+    Instruction::MOVE_OBJECT_FROM16,
+    Instruction::MOVE_OBJECT_16,
     Instruction::MOVE_EXCEPTION,
     Instruction::RETURN_VOID,
     Instruction::RETURN,
     Instruction::RETURN_WIDE,
+    Instruction::RETURN_OBJECT,
     Instruction::CONST_4,
     Instruction::CONST_16,
     Instruction::CONST,
+    Instruction::CONST_HIGH16,
+    Instruction::CONST_WIDE_16,
+    Instruction::CONST_WIDE_32,
+    Instruction::CONST_WIDE,
+    Instruction::CONST_WIDE_HIGH16,
     Instruction::CONST_STRING,
     Instruction::MONITOR_ENTER,
     Instruction::MONITOR_EXIT,
@@ -157,6 +169,13 @@
     Instruction::GOTO,
     Instruction::GOTO_16,
     Instruction::GOTO_32,
+    Instruction::PACKED_SWITCH,
+    Instruction::SPARSE_SWITCH,
+    Instruction::CMPL_FLOAT,
+    Instruction::CMPG_FLOAT,
+    Instruction::CMPL_DOUBLE,
+    Instruction::CMPG_DOUBLE,
+    Instruction::CMP_LONG,
     Instruction::IF_EQ,
     Instruction::IF_NE,
     Instruction::IF_LT,
@@ -224,117 +243,32 @@
     Instruction::SHL_INT_LIT8,
     Instruction::SHR_INT_LIT8,
     Instruction::USHR_INT_LIT8,
-    // TODO(Arm64): Enable compiler pass
-    // ----- ExtendedMIROpcode -----
-    kMirOpPhi,
-    kMirOpCopy,
-    kMirOpFusedCmplFloat,
-    kMirOpFusedCmpgFloat,
-    kMirOpFusedCmplDouble,
-    kMirOpFusedCmpgDouble,
-    kMirOpFusedCmpLong,
-    kMirOpNop,
-    kMirOpNullCheck,
-    kMirOpRangeCheck,
-    kMirOpDivZeroCheck,
-    kMirOpCheck,
-    kMirOpCheckPart2,
-    kMirOpSelect,
-
-#if ARM64_USE_EXPERIMENTAL_OPCODES
-    Instruction::MOVE_WIDE,
-    Instruction::MOVE_WIDE_FROM16,
-    Instruction::MOVE_WIDE_16,
-    Instruction::MOVE_OBJECT,
-    Instruction::MOVE_OBJECT_FROM16,
-    Instruction::MOVE_OBJECT_16,
-    // Instruction::PACKED_SWITCH,
-    // Instruction::SPARSE_SWITCH,
-    // Instruction::MOVE_RESULT,
-    // Instruction::MOVE_RESULT_WIDE,
-    // Instruction::MOVE_RESULT_OBJECT,
-    // Instruction::RETURN_OBJECT,
-    // Instruction::CONST_HIGH16,
-    // Instruction::CONST_WIDE_16,
-    // Instruction::CONST_WIDE_32,
-    // Instruction::CONST_WIDE,
-    // Instruction::CONST_WIDE_HIGH16,
-    // Instruction::CONST_STRING_JUMBO,
-    // Instruction::CONST_CLASS,
-    // Instruction::CHECK_CAST,
-    // Instruction::INSTANCE_OF,
-    // Instruction::ARRAY_LENGTH,
-    // Instruction::NEW_INSTANCE,
-    // Instruction::NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY_RANGE,
-    // Instruction::FILL_ARRAY_DATA,
+    Instruction::SGET,
+    Instruction::SGET_BOOLEAN,
+    Instruction::SGET_BYTE,
+    Instruction::SGET_CHAR,
+    Instruction::SGET_SHORT,
+    Instruction::SGET_OBJECT,
+    Instruction::SPUT,
+    Instruction::SPUT_OBJECT,
+    Instruction::SPUT_BOOLEAN,
+    Instruction::SPUT_BYTE,
+    Instruction::SPUT_CHAR,
+    Instruction::SPUT_SHORT,
     Instruction::CMPL_FLOAT,
     Instruction::CMPG_FLOAT,
-    Instruction::CMPL_DOUBLE,
-    Instruction::CMPG_DOUBLE,
-    Instruction::CMP_LONG,
-    // Instruction::UNUSED_3E,
-    // Instruction::UNUSED_3F,
-    // Instruction::UNUSED_40,
-    // Instruction::UNUSED_41,
-    // Instruction::UNUSED_42,
-    // Instruction::UNUSED_43,
-    // Instruction::AGET,
-    // Instruction::AGET_WIDE,
-    // Instruction::AGET_OBJECT,
-    // Instruction::AGET_BOOLEAN,
-    // Instruction::AGET_BYTE,
-    // Instruction::AGET_CHAR,
-    // Instruction::AGET_SHORT,
-    // Instruction::APUT,
-    // Instruction::APUT_WIDE,
-    // Instruction::APUT_OBJECT,
-    // Instruction::APUT_BOOLEAN,
-    // Instruction::APUT_BYTE,
-    // Instruction::APUT_CHAR,
-    // Instruction::APUT_SHORT,
-    // Instruction::IGET,
-    // Instruction::IGET_WIDE,
-    // Instruction::IGET_OBJECT,
-    // Instruction::IGET_BOOLEAN,
-    // Instruction::IGET_BYTE,
-    // Instruction::IGET_CHAR,
-    // Instruction::IGET_SHORT,
-    // Instruction::IPUT,
-    // Instruction::IPUT_WIDE,
-    // Instruction::IPUT_OBJECT,
-    // Instruction::IPUT_BOOLEAN,
-    // Instruction::IPUT_BYTE,
-    // Instruction::IPUT_CHAR,
-    // Instruction::IPUT_SHORT,
-    Instruction::SGET,
-    // Instruction::SGET_WIDE,
-    Instruction::SGET_OBJECT,
-    // Instruction::SGET_BOOLEAN,
-    // Instruction::SGET_BYTE,
-    // Instruction::SGET_CHAR,
-    // Instruction::SGET_SHORT,
-    Instruction::SPUT,
-    // Instruction::SPUT_WIDE,
-    // Instruction::SPUT_OBJECT,
-    // Instruction::SPUT_BOOLEAN,
-    // Instruction::SPUT_BYTE,
-    // Instruction::SPUT_CHAR,
-    // Instruction::SPUT_SHORT,
-    Instruction::INVOKE_VIRTUAL,
-    Instruction::INVOKE_SUPER,
-    Instruction::INVOKE_DIRECT,
-    Instruction::INVOKE_STATIC,
-    Instruction::INVOKE_INTERFACE,
-    // Instruction::RETURN_VOID_BARRIER,
-    // Instruction::INVOKE_VIRTUAL_RANGE,
-    // Instruction::INVOKE_SUPER_RANGE,
-    // Instruction::INVOKE_DIRECT_RANGE,
-    // Instruction::INVOKE_STATIC_RANGE,
-    // Instruction::INVOKE_INTERFACE_RANGE,
-    // Instruction::UNUSED_79,
-    // Instruction::UNUSED_7A,
+    Instruction::IGET,
+    Instruction::IGET_OBJECT,
+    Instruction::IGET_BOOLEAN,
+    Instruction::IGET_BYTE,
+    Instruction::IGET_CHAR,
+    Instruction::IGET_SHORT,
+    Instruction::IPUT,
+    Instruction::IPUT_OBJECT,
+    Instruction::IPUT_BOOLEAN,
+    Instruction::IPUT_BYTE,
+    Instruction::IPUT_CHAR,
+    Instruction::IPUT_SHORT,
     Instruction::NEG_LONG,
     Instruction::NOT_LONG,
     Instruction::NEG_DOUBLE,
@@ -361,12 +295,12 @@
     Instruction::SHL_LONG,
     Instruction::SHR_LONG,
     Instruction::USHR_LONG,
-    // Instruction::REM_FLOAT,
+    Instruction::REM_FLOAT,
     Instruction::ADD_DOUBLE,
     Instruction::SUB_DOUBLE,
     Instruction::MUL_DOUBLE,
     Instruction::DIV_DOUBLE,
-    // Instruction::REM_DOUBLE,
+    Instruction::REM_DOUBLE,
     Instruction::ADD_LONG_2ADDR,
     Instruction::SUB_LONG_2ADDR,
     Instruction::MUL_LONG_2ADDR,
@@ -378,12 +312,80 @@
     Instruction::SHL_LONG_2ADDR,
     Instruction::SHR_LONG_2ADDR,
     Instruction::USHR_LONG_2ADDR,
-    // Instruction::REM_FLOAT_2ADDR,
+    Instruction::REM_FLOAT_2ADDR,
     Instruction::ADD_DOUBLE_2ADDR,
     Instruction::SUB_DOUBLE_2ADDR,
     Instruction::MUL_DOUBLE_2ADDR,
     Instruction::DIV_DOUBLE_2ADDR,
-    // Instruction::REM_DOUBLE_2ADDR,
+    Instruction::REM_DOUBLE_2ADDR,
+    // TODO(Arm64): Enable compiler pass
+    // ----- ExtendedMIROpcode -----
+    kMirOpPhi,
+    kMirOpCopy,
+    kMirOpFusedCmplFloat,
+    kMirOpFusedCmpgFloat,
+    kMirOpFusedCmplDouble,
+    kMirOpFusedCmpgDouble,
+    kMirOpFusedCmpLong,
+    kMirOpNop,
+    kMirOpNullCheck,
+    kMirOpRangeCheck,
+    kMirOpDivZeroCheck,
+    kMirOpCheck,
+    kMirOpCheckPart2,
+    kMirOpSelect,
+
+#if ARM64_USE_EXPERIMENTAL_OPCODES
+    // Instruction::MOVE_RESULT,
+    // Instruction::MOVE_RESULT_WIDE,
+    // Instruction::MOVE_RESULT_OBJECT,
+    // Instruction::CONST_STRING_JUMBO,
+    // Instruction::CONST_CLASS,
+    // Instruction::CHECK_CAST,
+    // Instruction::INSTANCE_OF,
+    // Instruction::ARRAY_LENGTH,
+    // Instruction::NEW_INSTANCE,
+    // Instruction::NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY_RANGE,
+    // Instruction::FILL_ARRAY_DATA,
+    // Instruction::UNUSED_3E,
+    // Instruction::UNUSED_3F,
+    // Instruction::UNUSED_40,
+    // Instruction::UNUSED_41,
+    // Instruction::UNUSED_42,
+    // Instruction::UNUSED_43,
+    // Instruction::AGET,
+    // Instruction::AGET_WIDE,
+    // Instruction::AGET_OBJECT,
+    // Instruction::AGET_BOOLEAN,
+    // Instruction::AGET_BYTE,
+    // Instruction::AGET_CHAR,
+    // Instruction::AGET_SHORT,
+    // Instruction::APUT,
+    // Instruction::APUT_WIDE,
+    // Instruction::APUT_OBJECT,
+    // Instruction::APUT_BOOLEAN,
+    // Instruction::APUT_BYTE,
+    // Instruction::APUT_CHAR,
+    // Instruction::APUT_SHORT,
+    // Instruction::IPUT_WIDE,
+    // Instruction::IGET_WIDE,
+    // Instruction::SGET_WIDE,
+    // Instruction::SPUT_WIDE,
+    Instruction::INVOKE_VIRTUAL,
+    Instruction::INVOKE_SUPER,
+    Instruction::INVOKE_DIRECT,
+    Instruction::INVOKE_STATIC,
+    Instruction::INVOKE_INTERFACE,
+    // Instruction::RETURN_VOID_BARRIER,
+    // Instruction::INVOKE_VIRTUAL_RANGE,
+    // Instruction::INVOKE_SUPER_RANGE,
+    // Instruction::INVOKE_DIRECT_RANGE,
+    // Instruction::INVOKE_STATIC_RANGE,
+    // Instruction::INVOKE_INTERFACE_RANGE,
+    // Instruction::UNUSED_79,
+    // Instruction::UNUSED_7A,
     // Instruction::IGET_QUICK,
     // Instruction::IGET_WIDE_QUICK,
     // Instruction::IGET_OBJECT_QUICK,
@@ -706,8 +708,7 @@
 // (ARM64) Current calling conversion only support 32bit softfp
 //         which has problems with long, float, double
 constexpr char arm64_supported_types[] = "ZBSCILVJFD";
-// (x84_64) We still have troubles with compiling longs/doubles/floats
-constexpr char x86_64_supported_types[] = "ZBSCILV";
+constexpr char x86_64_supported_types[] = "ZBSCILVJFD";
 
 // TODO: Remove this when we are able to compile everything.
 static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
@@ -718,7 +719,7 @@
   // 1 is for the return type. Currently, we only accept 2 parameters at the most.
   // (x86_64): For now we have the same limitation. But we might want to split this
   //           check in future into two separate cases for arm64 and x86_64.
-  if (shorty_size > (1 + 2)) {
+  if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) {
     return false;
   }
 
@@ -739,7 +740,7 @@
 static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                              CompilationUnit& cu) {
   // There is some limitation with current ARM 64 backend.
-  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
+  if (cu.instruction_set == kArm64) {
     // Check if we can compile the prototype.
     const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
     if (!CanCompileShorty(shorty, cu.instruction_set)) {
@@ -754,7 +755,7 @@
       support_list_size = arraysize(x86_64_support_list);
     }
 
-    for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
+    for (unsigned int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
       BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx);
       if (bb == NULL) continue;
       if (bb->block_type == kDead) continue;
@@ -883,12 +884,12 @@
         (1 << kBBOpt) |
         (1 << kMatch) |
         (1 << kPromoteCompilerTemps));
-  }
-
-  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
-    // TODO(Arm64): enable optimizations once backend is mature enough.
+  } else if (cu.instruction_set == kX86_64) {
     // TODO(X86_64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
+  } else if (cu.instruction_set == kArm64) {
+    // TODO(Arm64): enable optimizations once backend is mature enough.
+    cu.disable_opt = ~(uint32_t)0;
     cu.enable_debug |= (1 << kDebugCodegenDump);
   }
 
@@ -956,6 +957,10 @@
 
   CompiledMethod* result = NULL;
 
+  if (cu.mir_graph->PuntToInterpreter()) {
+    return NULL;
+  }
+
   cu.cg->Materialize();
 
   cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 2ec17de..1350665 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1011,7 +1011,7 @@
   }
 
   // Contains a pattern we don't want to compile?
-  if (punt_to_interpreter_) {
+  if (PuntToInterpreter()) {
     return true;
   }
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index a2676c8..63a5570 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -586,7 +586,7 @@
   if (current_method_ == 0) {
     DCHECK(entry_block_ == NULL);
     DCHECK(exit_block_ == NULL);
-    DCHECK_EQ(num_blocks_, 0);
+    DCHECK_EQ(num_blocks_, 0U);
     // Use id 0 to represent a null block.
     BasicBlock* null_block = NewMemBB(kNullBlock, num_blocks_++);
     DCHECK_EQ(null_block->id, NullBasicBlockId);
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index b6cec66..15c0aa4 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -587,7 +587,7 @@
     return m_units_[m_unit_index]->GetCodeItem()->insns_;
   }
 
-  int GetNumBlocks() const {
+  unsigned int GetNumBlocks() const {
     return num_blocks_;
   }
 
@@ -607,7 +607,7 @@
     return exit_block_;
   }
 
-  BasicBlock* GetBasicBlock(int block_id) const {
+  BasicBlock* GetBasicBlock(unsigned int block_id) const {
     return (block_id == NullBasicBlockId) ? NULL : block_list_.Get(block_id);
   }
 
@@ -958,6 +958,14 @@
   bool SetHigh(int index, bool is_high);
   bool SetHigh(int index);
 
+  bool PuntToInterpreter() {
+    return punt_to_interpreter_;
+  }
+
+  void SetPuntToInterpreter(bool val) {
+    punt_to_interpreter_ = val;
+  }
+
   char* GetDalvikDisassembly(const MIR* mir);
   void ReplaceSpecialChars(std::string& str);
   std::string GetSSAName(int ssa_reg);
@@ -1149,7 +1157,7 @@
   ArenaBitVector* try_block_addr_;
   BasicBlock* entry_block_;
   BasicBlock* exit_block_;
-  int num_blocks_;
+  unsigned int num_blocks_;
   const DexFile::CodeItem* current_code_item_;
   GrowableArray<uint16_t> dex_pc_to_block_map_;  // FindBlock lookup cache.
   std::vector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 1460ce6..4b2bc4a 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -346,7 +346,7 @@
           if (mir->next != NULL) {
             MIR* mir_next = mir->next;
             // Make sure result of cmp is used by next insn and nowhere else
-            if (IsInstructionIfCcZ(mir->next->dalvikInsn.opcode) &&
+            if (IsInstructionIfCcZ(mir_next->dalvikInsn.opcode) &&
                 (mir->ssa_rep->defs[0] == mir_next->ssa_rep->uses[0]) &&
                 (GetSSAUseCount(mir->ssa_rep->defs[0]) == 1)) {
               mir_next->meta.ccode = ConditionCodeForIfCcZ(mir_next->dalvikInsn.opcode);
@@ -374,12 +374,16 @@
                 default: LOG(ERROR) << "Unexpected opcode: " << opcode;
               }
               mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+              // Copy the SSA information that is relevant.
               mir_next->ssa_rep->num_uses = mir->ssa_rep->num_uses;
               mir_next->ssa_rep->uses = mir->ssa_rep->uses;
               mir_next->ssa_rep->fp_use = mir->ssa_rep->fp_use;
               mir_next->ssa_rep->num_defs = 0;
               mir->ssa_rep->num_uses = 0;
               mir->ssa_rep->num_defs = 0;
+              // Copy in the decoded instruction information for potential SSA re-creation.
+              mir_next->dalvikInsn.vA = mir->dalvikInsn.vB;
+              mir_next->dalvikInsn.vB = mir->dalvikInsn.vC;
             }
           }
           break;
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index e32e7cb..6272555 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -109,12 +109,6 @@
   kArmRegEnd   = 48,
 };
 
-#define ENCODE_ARM_REG_LIST(N)      (static_cast<uint64_t>(N))
-#define ENCODE_ARM_REG_SP           (1ULL << kArmRegSP)
-#define ENCODE_ARM_REG_LR           (1ULL << kArmRegLR)
-#define ENCODE_ARM_REG_PC           (1ULL << kArmRegPC)
-#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16)
-
 enum ArmNativeRegisterPool {
   r0           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
   r1           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index a895e6e..5083bbc 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1628,7 +1628,7 @@
   CreateNativeGcMap();
 }
 
-int ArmMir2Lir::GetInsnSize(LIR* lir) {
+size_t ArmMir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 9f9e618..5466abd 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -19,6 +19,7 @@
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "gc/accounting/card_table.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 
 namespace art {
@@ -86,7 +87,7 @@
   tab_rec->anchor = switch_branch;
   // Needs to use setflags encoding here
   OpRegRegImm(kOpSub, r_idx, r_idx, 1);  // For value == 1, this should set flags.
-  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   OpCondBranch(kCondNe, target);
 }
 
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 9c801a5..95bcfbd 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -63,7 +63,7 @@
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
-    uint64_t GetRegMaskCommon(RegStorage reg);
+    ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
@@ -79,14 +79,15 @@
     int AssignInsnOffsets();
     void AssignOffsets();
     static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
-    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
+    void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                  ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
-    uint64_t GetPCUseDefEncoding();
+    ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
@@ -204,6 +205,8 @@
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
     LIR* LoadFPConstantValue(int r_dest, int value);
+    LIR* LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
+                              RegStorage r_src_dest, RegStorage r_work = RegStorage::InvalidReg());
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
@@ -217,6 +220,10 @@
     bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op);
     bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops);
     void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops);
+
+    static constexpr ResourceMask GetRegMaskArm(RegStorage reg);
+    static constexpr ResourceMask EncodeArmRegList(int reg_list);
+    static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 4732e52..916c528 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -224,13 +224,13 @@
     bool cheap_false_val = InexpensiveConstantInt(false_val);
     if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
       OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
-      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
       LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
       LoadConstant(rl_result.reg, false_val);
       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
     } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
       OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
-      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
       LIR* it = OpIT(kCondLs, "");
       LoadConstant(rl_result.reg, false_val);
       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
@@ -882,14 +882,14 @@
     }
     FreeTemp(r_tmp_high);  // Now unneeded
 
-    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
     it = OpIT(kCondEq, "T");
     NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
 
   } else {
     NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
     OpRegReg(kOpSub, r_tmp, rl_expected.reg);
-    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
     it = OpIT(kCondEq, "T");
     NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
   }
@@ -907,7 +907,7 @@
   // result := (tmp1 != 0) ? 0 : 1;
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
-  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   it = OpIT(kCondUlt, "");
   LoadConstant(rl_result.reg, 0); /* cc */
   FreeTemp(r_tmp);  // Now unneeded.
@@ -971,7 +971,7 @@
 LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
   // Combine sub & test using sub setflags encoding here
   OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
-  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   return OpCondBranch(c_code, target);
 }
 
@@ -1004,7 +1004,7 @@
 
   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
   DCHECK(!barrier->flags.use_def_invalid);
-  barrier->u.m.def_mask = ENCODE_ALL;
+  barrier->u.m.def_mask = &kEncodeAll;
   return ret;
 #else
   return false;
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index bd9c8b4..e1e2d5b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -113,6 +113,7 @@
     case kHiddenArg: res_reg = rs_r12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
@@ -134,30 +135,32 @@
 /*
  * Decode the register id.
  */
-uint64_t ArmMir2Lir::GetRegMaskCommon(RegStorage reg) {
-  uint64_t seed;
-  int shift;
-  int reg_id = reg.GetRegNum();
-  /* Each double register is equal to a pair of single-precision FP registers */
-  if (reg.IsDouble()) {
-    seed = 0x3;
-    reg_id = reg_id << 1;
-  } else {
-    seed = 1;
-  }
-  /* FP register starts at bit position 16 */
-  shift = reg.IsFloat() ? kArmFPReg0 : 0;
-  /* Expand the double register id into single offset */
-  shift += reg_id;
-  return (seed << shift);
+ResourceMask ArmMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
+  return GetRegMaskArm(reg);
 }
 
-uint64_t ArmMir2Lir::GetPCUseDefEncoding() {
-  return ENCODE_ARM_REG_PC;
+constexpr ResourceMask ArmMir2Lir::GetRegMaskArm(RegStorage reg) {
+  return reg.IsDouble()
+      /* Each double register is equal to a pair of single-precision FP registers */
+      ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kArmFPReg0)
+      : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kArmFPReg0 : reg.GetRegNum());
+}
+
+constexpr ResourceMask ArmMir2Lir::EncodeArmRegList(int reg_list) {
+  return ResourceMask::RawMask(static_cast<uint64_t >(reg_list), 0u);
+}
+
+constexpr ResourceMask ArmMir2Lir::EncodeArmRegFpcsList(int reg_list) {
+  return ResourceMask::RawMask(static_cast<uint64_t >(reg_list) << kArmFPReg16, 0u);
+}
+
+ResourceMask ArmMir2Lir::GetPCUseDefEncoding() const {
+  return ResourceMask::Bit(kArmRegPC);
 }
 
 // Thumb2 specific setup.  TODO: inline?:
-void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                          ResourceMask* use_mask, ResourceMask* def_mask) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   DCHECK(!lir->flags.use_def_invalid);
 
@@ -168,70 +171,70 @@
                 REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
                 REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
     if (flags & REG_DEF_SP) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
+      def_mask->SetBit(kArmRegSP);
     }
 
     if (flags & REG_USE_SP) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
+      use_mask->SetBit(kArmRegSP);
     }
 
     if (flags & REG_DEF_LIST0) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
+      def_mask->SetBits(EncodeArmRegList(lir->operands[0]));
     }
 
     if (flags & REG_DEF_LIST1) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+      def_mask->SetBits(EncodeArmRegList(lir->operands[1]));
     }
 
     if (flags & REG_DEF_FPCS_LIST0) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+      def_mask->SetBits(EncodeArmRegList(lir->operands[0]));
     }
 
     if (flags & REG_DEF_FPCS_LIST2) {
       for (int i = 0; i < lir->operands[2]; i++) {
-        SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i);
+        SetupRegMask(def_mask, lir->operands[1] + i);
       }
     }
 
     if (flags & REG_USE_PC) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_PC;
+      use_mask->SetBit(kArmRegPC);
     }
 
     /* Conservatively treat the IT block */
     if (flags & IS_IT) {
-      lir->u.m.def_mask = ENCODE_ALL;
+      *def_mask = kEncodeAll;
     }
 
     if (flags & REG_USE_LIST0) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
+      use_mask->SetBits(EncodeArmRegList(lir->operands[0]));
     }
 
     if (flags & REG_USE_LIST1) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+      use_mask->SetBits(EncodeArmRegList(lir->operands[1]));
     }
 
     if (flags & REG_USE_FPCS_LIST0) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+      use_mask->SetBits(EncodeArmRegList(lir->operands[0]));
     }
 
     if (flags & REG_USE_FPCS_LIST2) {
       for (int i = 0; i < lir->operands[2]; i++) {
-        SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i);
+        SetupRegMask(use_mask, lir->operands[1] + i);
       }
     }
     /* Fixup for kThumbPush/lr and kThumbPop/pc */
     if (opcode == kThumbPush || opcode == kThumbPop) {
-      uint64_t r8Mask = GetRegMaskCommon(rs_r8);
-      if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
-        lir->u.m.use_mask &= ~r8Mask;
-        lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
-      } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) {
-        lir->u.m.def_mask &= ~r8Mask;
-        lir->u.m.def_mask |= ENCODE_ARM_REG_PC;
+      constexpr ResourceMask r8Mask = GetRegMaskArm(rs_r8);
+      if ((opcode == kThumbPush) && (use_mask->Intersects(r8Mask))) {
+        use_mask->ClearBits(r8Mask);
+        use_mask->SetBit(kArmRegLR);
+      } else if ((opcode == kThumbPop) && (def_mask->Intersects(r8Mask))) {
+        def_mask->ClearBits(r8Mask);
+        def_mask->SetBit(kArmRegPC);;
       }
     }
     if (flags & REG_DEF_LR) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+      def_mask->SetBit(kArmRegLR);
     }
   }
 }
@@ -485,44 +488,44 @@
   return buf;
 }
 
-void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) {
+void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) {
   char buf[256];
   buf[0] = 0;
 
-  if (mask == ENCODE_ALL) {
+  if (mask.Equals(kEncodeAll)) {
     strcpy(buf, "all");
   } else {
     char num[8];
     int i;
 
     for (i = 0; i < kArmRegEnd; i++) {
-      if (mask & (1ULL << i)) {
+      if (mask.HasBit(i)) {
         snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
       }
     }
 
-    if (mask & ENCODE_CCODE) {
+    if (mask.HasBit(ResourceMask::kCCode)) {
       strcat(buf, "cc ");
     }
-    if (mask & ENCODE_FP_STATUS) {
+    if (mask.HasBit(ResourceMask::kFPStatus)) {
       strcat(buf, "fpcc ");
     }
 
     /* Memory bits */
-    if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
+    if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) {
       snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
                DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
                DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
     }
-    if (mask & ENCODE_LITERAL) {
+    if (mask.HasBit(ResourceMask::kLiteral)) {
       strcat(buf, "lit ");
     }
 
-    if (mask & ENCODE_HEAP_REF) {
+    if (mask.HasBit(ResourceMask::kHeapRef)) {
       strcat(buf, "heap ");
     }
-    if (mask & ENCODE_MUST_NOT_ALIAS) {
+    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
       strcat(buf, "noalias ");
     }
   }
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 86d32f4..61d3d56 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -87,9 +87,9 @@
   if (data_target == NULL) {
     data_target = AddWordData(&literal_list_, value);
   }
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
                           r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target);
-  SetMemRefType(load_pc_rel, true, kLiteral);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
@@ -670,6 +670,7 @@
     if (data_target == NULL) {
       data_target = AddWideData(&literal_list_, val_lo, val_hi);
     }
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
     if (r_dest.IsFloat()) {
       res = RawLIR(current_dalvik_offset_, kThumb2Vldrd,
                    r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target);
@@ -678,7 +679,6 @@
       res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8,
                    r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target);
     }
-    SetMemRefType(res, true, kLiteral);
     AppendLIR(res);
   }
   return res;
@@ -819,6 +819,30 @@
   return store;
 }
 
+// Helper function for LoadBaseDispBody()/StoreBaseDispBody().
+LIR* ArmMir2Lir::LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
+                                      RegStorage r_src_dest, RegStorage r_work) {
+  DCHECK_EQ(displacement & 3, 0);
+  int encoded_disp = (displacement & 1020) >> 2;  // Within range of the instruction.
+  RegStorage r_ptr = r_base;
+  if ((displacement & ~1020) != 0) {
+    r_ptr = r_work.Valid() ? r_work : AllocTemp();
+    // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
+  }
+  LIR* lir = nullptr;
+  if (!r_src_dest.IsPair()) {
+    lir = NewLIR3(opcode, r_src_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
+  } else {
+    lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(),
+                  encoded_disp);
+  }
+  if ((displacement & ~1020) != 0 && !r_work.Valid()) {
+    FreeTemp(r_ptr);
+  }
+  return lir;
+}
+
 /*
  * Load value from base + displacement.  Optionally perform null check
  * on base (which must have an associated s_reg and MIR).  If not
@@ -836,40 +860,26 @@
   switch (size) {
     case kDouble:
     // Intentional fall-though.
-    case k64: {
-      DCHECK_EQ(displacement & 3, 0);
-      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vldrd/kThumb2LdrdI8.
-      RegStorage r_ptr = r_base;
-      if ((displacement & ~1020) != 0) {
-        // For core register load, use the r_dest.GetLow() for the temporary pointer.
-        r_ptr = r_dest.IsFloat() ? AllocTemp() : r_dest.GetLow();
-        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
-        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
-      }
+    case k64:
       if (r_dest.IsFloat()) {
         DCHECK(!r_dest.IsPair());
-        load = NewLIR3(kThumb2Vldrd, r_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
+        load = LoadStoreMaxDisp1020(kThumb2Vldrd, r_base, displacement, r_dest);
       } else {
-        load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg(),
-                       encoded_disp);
-      }
-      if ((displacement & ~1020) != 0 && r_dest.IsFloat()) {
-        FreeTemp(r_ptr);
+        DCHECK(r_dest.IsPair());
+        // Use the r_dest.GetLow() for the temporary pointer if needed.
+        load = LoadStoreMaxDisp1020(kThumb2LdrdI8, r_base, displacement, r_dest, r_dest.GetLow());
       }
       already_generated = true;
       break;
-    }
     case kSingle:
     // Intentional fall-though.
     case k32:
     // Intentional fall-though.
     case kReference:
       if (r_dest.IsFloat()) {
-        opcode = kThumb2Vldrs;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
+        DCHECK(r_dest.IsSingle());
+        load = LoadStoreMaxDisp1020(kThumb2Vldrs, r_base, displacement, r_dest);
+        already_generated = true;
         break;
       }
       if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
@@ -934,19 +944,15 @@
     } else {
       RegStorage reg_offset = AllocTemp();
       LoadConstant(reg_offset, encoded_disp);
-      if (r_dest.IsFloat()) {
-        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-        OpRegReg(kOpAdd, reg_offset, r_base);
-        load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
-      } else {
-        load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
-      }
+      DCHECK(!r_dest.IsFloat());
+      load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
       FreeTemp(reg_offset);
     }
   }
 
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
-  if (r_base == rs_rARM_SP) {
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    DCHECK(r_base == rs_rARM_SP);
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
   }
   return load;
@@ -992,28 +998,16 @@
   switch (size) {
     case kDouble:
     // Intentional fall-though.
-    case k64: {
-      DCHECK_EQ(displacement & 3, 0);
-      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vstrd/kThumb2StrdI8.
-      RegStorage r_ptr = r_base;
-      if ((displacement & ~1020) != 0) {
-        r_ptr = AllocTemp();
-        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
-        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
-      }
+    case k64:
       if (r_src.IsFloat()) {
         DCHECK(!r_src.IsPair());
-        store = NewLIR3(kThumb2Vstrd, r_src.GetReg(), r_ptr.GetReg(), encoded_disp);
+        store = LoadStoreMaxDisp1020(kThumb2Vstrd, r_base, displacement, r_src);
       } else {
-        store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg(),
-                        encoded_disp);
-      }
-      if ((displacement & ~1020) != 0) {
-        FreeTemp(r_ptr);
+        DCHECK(r_src.IsPair());
+        store = LoadStoreMaxDisp1020(kThumb2StrdI8, r_base, displacement, r_src);
       }
       already_generated = true;
       break;
-    }
     case kSingle:
     // Intentional fall-through.
     case k32:
@@ -1021,11 +1015,8 @@
     case kReference:
       if (r_src.IsFloat()) {
         DCHECK(r_src.IsSingle());
-        opcode = kThumb2Vstrs;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
+        store = LoadStoreMaxDisp1020(kThumb2Vstrs, r_base, displacement, r_src);
+        already_generated = true;
         break;
       }
       if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
@@ -1073,19 +1064,15 @@
     } else {
       RegStorage r_scratch = AllocTemp();
       LoadConstant(r_scratch, encoded_disp);
-      if (r_src.IsFloat()) {
-        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-        OpRegReg(kOpAdd, r_scratch, r_base);
-        store = StoreBaseDispBody(r_scratch, 0, r_src, size);
-      } else {
-        store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
-      }
+      DCHECK(!r_src.IsFloat());
+      store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
       FreeTemp(r_scratch);
     }
   }
 
   // TODO: In future, may need to differentiate Dalvik & spill accesses
-  if (r_base == rs_rARM_SP) {
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    DCHECK(r_base == rs_rARM_SP);
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
   }
   return store;
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 6a6b0f6..c1ce03d 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -102,17 +102,14 @@
 #define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
 #define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr)
 
-enum ArmResourceEncodingPos {
-  kArmGPReg0   = 0,
-  kArmRegLR    = 30,
-  kArmRegSP    = 31,
-  kArmFPReg0   = 32,
-  kArmRegEnd   = 64,
+enum Arm64ResourceEncodingPos {
+  kArm64GPReg0   = 0,
+  kArm64RegLR    = 30,
+  kArm64RegSP    = 31,
+  kArm64FPReg0   = 32,
+  kArm64RegEnd   = 64,
 };
 
-#define ENCODE_ARM_REG_SP           (1ULL << kArmRegSP)
-#define ENCODE_ARM_REG_LR           (1ULL << kArmRegLR)
-
 #define IS_SIGNED_IMM(size, value) \
   ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1)))
 #define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
@@ -212,7 +209,7 @@
 };
 
 #define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0))
-
+#define ENCODE_NO_EXTEND (EncodeExtend(kA64Uxtx, 0))
 /*
  * The following enum defines the list of supported A64 instructions by the
  * assembler. Their corresponding EncodingMap positions will be defined in
@@ -289,6 +286,7 @@
   kA64Ldur3fXd,      // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
   kA64Ldur3rXd,      // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0].
   kA64Ldxr2rX,       // ldxr[1s00100001011111011111] rn[9-5] rt[4-0].
+  kA64Ldaxr2rX,      // ldaxr[1s00100001011111111111] rn[9-5] rt[4-0].
   kA64Lsl3rrr,       // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0].
   kA64Lsr3rrd,       // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}".
   kA64Lsr3rrr,       // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0].
@@ -328,8 +326,9 @@
   kA64Stur3fXd,      // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0].
   kA64Stur3rXd,      // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0].
   kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
+  kA64Stlxr3wrX,     // stlxr[11001000000] rs[20-16] [111111] rn[9-5] rt[4-0].
   kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Sub4rrro,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
   kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
   kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
@@ -394,9 +393,6 @@
   kFmtSkip,      // Unused field, but continue to next.
 };
 
-// TODO(Arm64): should we get rid of kFmtExtend?
-//   Note: the only instructions that use it (cmp, cmn) are not used themselves.
-
 // Struct used to define the snippet positions for each A64 opcode.
 struct ArmEncodingMap {
   uint32_t wskeleton;
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 4a0c055..9362147 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -268,7 +268,7 @@
                  kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fmov", "!0s, !1w", kFixupNone),
-    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000),
+    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e670000),
                  kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fmov", "!0S, !1x", kFixupNone),
@@ -386,6 +386,10 @@
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
                  "ldxr", "!0r, [!1X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldaxr2rX), SIZE_VARIANTS(0x885ffc00),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldaxr", "!0r, [!1X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
@@ -443,11 +447,11 @@
                  kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
                  "ret", "", kFixupNone),
     ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
-                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "rev", "!0r, !1r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0),
-                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+    ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0x5ac00400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "rev16", "!0r, !1r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00),
@@ -542,6 +546,10 @@
                  kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
                  "stxr", "!0w, !1r, [!2X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stlxr3wrX), SIZE_VARIANTS(0x8800fc00),
+                 kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 "stlxr", "!0w, !1r, [!2X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
                  kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
                  kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
@@ -803,7 +811,7 @@
           DCHECK(target_lir);
           CodeOffset pc = lir->offset;
           CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+            ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
           int32_t delta = target - pc;
           if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
             LOG(FATAL) << "Invalid jump range in kFixupLoad";
@@ -879,7 +887,7 @@
   CreateNativeGcMap();
 }
 
-int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
+size_t Arm64Mir2Lir::GetInsnSize(LIR* lir) {
   ArmOpcode opcode = UNWIDE(lir->opcode);
   DCHECK(!IsPseudoLirOp(opcode));
   return EncodingMap[opcode].size;
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index d0f8e74..59eec3d 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -19,6 +19,7 @@
 #include "arm64_lir.h"
 #include "codegen_arm64.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "gc/accounting/card_table.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 
 namespace art {
@@ -67,7 +68,7 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage r_base = AllocTemp();
+  RegStorage r_base = AllocTempWide();
   // Allocate key and disp temps.
   RegStorage r_key = AllocTemp();
   RegStorage r_disp = AllocTemp();
@@ -94,7 +95,8 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  OpRegRegRegShift(kOpAdd, r_base, r_base, r_disp, ENCODE_NO_SHIFT);
+  // TODO(Arm64): generate "add x1, x1, w3, sxtw" rather than "add x1, x1, x3"?
+  OpRegRegRegShift(kOpAdd, r_base, r_base, As64BitReg(r_disp), ENCODE_NO_SHIFT);
   NewLIR1(kA64Br1x, r_base.GetReg());
 
   // Loop exit label.
@@ -104,7 +106,7 @@
 
 
 void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
-                                 RegLocation rl_src) {
+                                   RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpPackedSwitchTable(table);
@@ -121,7 +123,7 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage table_base = AllocTemp();
+  RegStorage table_base = AllocTempWide();
   // Materialize a pointer to the switch table
   NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
   int low_key = s4FromSwitchData(&table[2]);
@@ -139,15 +141,17 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32);
+  // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
+  LoadBaseIndexed(table_base, key_reg, As64BitReg(disp_reg), 2, k32);
 
   // Get base branch address.
-  RegStorage branch_reg = AllocTemp();
+  RegStorage branch_reg = AllocTempWide();
   LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, disp_reg, ENCODE_NO_SHIFT);
+  // TODO(Arm64): generate "add x4, x4, w3, sxtw" rather than "add x4, x4, x3"?
+  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), ENCODE_NO_SHIFT);
   NewLIR1(kA64Br1x, branch_reg.GetReg());
 
   // branch_over target here
@@ -297,12 +301,14 @@
  * Mark garbage collection card. Skip if the value we're storing is null.
  */
 void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
-  RegStorage reg_card_base = AllocTemp();
+  RegStorage reg_card_base = AllocTempWide();
   RegStorage reg_card_no = AllocTemp();
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
-  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
+  // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
+  StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base),
+                   0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch_over->target = target;
   FreeTemp(reg_card_base);
@@ -311,62 +317,133 @@
 
 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
-   * On entry, x0, x1, x2 & x3 are live.  Let the register allocation
+   * On entry, x0 to x7 are live.  Let the register allocation
    * mechanism know so it doesn't try to use any of them when
-   * expanding the frame or flushing.  This leaves the utility
-   * code with a single temp: r12.  This should be enough.
+   * expanding the frame or flushing.
+   * Reserve x8 & x9 for temporaries.
    */
   LockTemp(rs_x0);
   LockTemp(rs_x1);
   LockTemp(rs_x2);
   LockTemp(rs_x3);
+  LockTemp(rs_x4);
+  LockTemp(rs_x5);
+  LockTemp(rs_x6);
+  LockTemp(rs_x7);
+  LockTemp(rs_x8);
+  LockTemp(rs_x9);
 
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
   bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                            (static_cast<size_t>(frame_size_) <
-                            Thread::kStackOverflowReservedBytes));
+                              (static_cast<size_t>(frame_size_) <
+                              Thread::kStackOverflowReservedBytes));
+
   NewLIR0(kPseudoMethodEntry);
 
+  const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  const int spill_count = num_core_spills_ + num_fp_spills_;
+  const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
+  const int frame_size_without_spills = frame_size_ - spill_size;
+
   if (!skip_overflow_check) {
-    LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12);
-    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
-      /* Load stack limit */
-      // TODO(Arm64): fix the line below:
-      // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
+      if (!large_frame) {
+        // Load stack limit
+        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
+      }
     } else {
+      // TODO(Arm64) Implement implicit checks.
       // Implicit stack overflow check.
       // Generate a load from [sp, #-framesize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
-      Load32Disp(rs_rA64_SP, 0, rs_wzr);
-      MarkPossibleStackOverflowException();
+      // Load32Disp(rs_rA64_SP, -Thread::kStackOverflowReservedBytes, rs_wzr);
+      // MarkPossibleStackOverflowException();
+      LOG(FATAL) << "Implicit stack overflow checks not implemented.";
     }
-  } else if (frame_size_ > 0) {
-    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
+  }
+
+  if (frame_size_ > 0) {
+    OpRegImm64(kOpSub, rs_rA64_SP, spill_size);
   }
 
   /* Need to spill any FP regs? */
   if (fp_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+    int spill_offset = spill_size - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
     SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
   }
 
   /* Spill core callee saves. */
   if (core_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+    int spill_offset = spill_size - kArm64PointerSize*num_core_spills_;
     SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
   }
 
+  if (!skip_overflow_check) {
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      class StackOverflowSlowPath: public LIRSlowPath {
+      public:
+        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
+              LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr),
+              sp_displace_(sp_displace) {
+        }
+        void Compile() OVERRIDE {
+          m2l_->ResetRegPool();
+          m2l_->ResetDefTracking();
+          GenerateTargetLabel(kPseudoThrowTarget);
+          // Unwinds stack.
+          m2l_->OpRegImm(kOpAdd, rs_rA64_SP, sp_displace_);
+          m2l_->ClobberCallerSave();
+          ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
+          m2l_->LockTemp(rs_x8);
+          m2l_->LoadWordDisp(rs_rA64_SELF, func_offset.Int32Value(), rs_x8);
+          m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg());
+          m2l_->FreeTemp(rs_x8);
+        }
+
+      private:
+        const size_t sp_displace_;
+      };
+
+      if (large_frame) {
+        // Compare Expected SP against bottom of stack.
+        // Branch to throw target if there is not enough room.
+        OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
+        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr);
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
+        OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
+      } else {
+        /*
+         * If the frame is small enough we are guaranteed to have enough space that remains to
+         * handle signals on the user stack.
+         * Establishes stack before checks.
+         */
+        OpRegRegImm(kOpSub, rs_rA64_SP, rs_rA64_SP, frame_size_without_spills);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x9, nullptr);
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
+      }
+    } else {
+      OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
+    }
+  } else {
+    OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
+  }
+
   FlushIns(ArgLocs, rl_method);
 
   FreeTemp(rs_x0);
   FreeTemp(rs_x1);
   FreeTemp(rs_x2);
   FreeTemp(rs_x3);
+  FreeTemp(rs_x4);
+  FreeTemp(rs_x5);
+  FreeTemp(rs_x6);
+  FreeTemp(rs_x7);
+  FreeTemp(rs_x8);
+  FreeTemp(rs_x9);
 }
 
 void Arm64Mir2Lir::GenExitSequence() {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 6251f4f..9a80c69 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -63,7 +63,7 @@
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
-    uint64_t GetRegMaskCommon(RegStorage reg);
+    ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
@@ -78,14 +78,15 @@
     int AssignInsnOffsets();
     void AssignOffsets();
     uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
-    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
+    void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                  ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
-    uint64_t GetPCUseDefEncoding();
+    ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
@@ -123,6 +124,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    bool GenInlinedAbsLong(CallInfo* info);
     void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
@@ -184,6 +186,7 @@
     LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
     LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value);
     LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
     LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
     LIR* OpTestSuspend(LIR* target);
@@ -201,6 +204,7 @@
     LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
                           int shift);
     LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
+    LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kA64Last];
     int EncodeShift(int code, int amount);
     int EncodeExtend(int extend_type, int amount);
@@ -223,6 +227,40 @@
                     bool skip_this);
 
   private:
+    /**
+     * @brief Given register xNN (dNN), returns register wNN (sNN).
+     * @param reg #RegStorage containing a Solo64 input register (e.g. @c x1 or @c d2).
+     * @return A Solo32 with the same register number as the @p reg (e.g. @c w1 or @c s2).
+     * @see As64BitReg
+     */
+    RegStorage As32BitReg(RegStorage reg) {
+      DCHECK(reg.Is64Bit());
+      DCHECK(!reg.IsPair());
+      RegStorage ret_val = RegStorage(RegStorage::k32BitSolo,
+                                      reg.GetRawBits() & RegStorage::kRegTypeMask);
+      DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask)
+                               ->GetReg().GetReg(),
+                ret_val.GetReg());
+      return ret_val;
+    }
+
+    /**
+     * @brief Given register wNN (sNN), returns register xNN (dNN).
+     * @param reg #RegStorage containing a Solo32 input register (e.g. @c w1 or @c s2).
+     * @return A Solo64 with the same register number as the @p reg (e.g. @c x1 or @c d2).
+     * @see As32BitReg
+     */
+    RegStorage As64BitReg(RegStorage reg) {
+      DCHECK(reg.Is32Bit());
+      DCHECK(!reg.IsPair());
+      RegStorage ret_val = RegStorage(RegStorage::k64BitSolo,
+                                      reg.GetRawBits() & RegStorage::kRegTypeMask);
+      DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask)
+                               ->GetReg().GetReg(),
+                ret_val.GetReg());
+      return ret_val;
+    }
+
     LIR* LoadFPConstantValue(int r_dest, int32_t value);
     LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index acc7d17..265e8d2 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -45,6 +45,7 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
+      // TODO: Fix xSELF.
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturn(kFPReg);
@@ -88,8 +89,15 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
-                                              false);
+      // TODO: Fix xSELF.
+      {
+        ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod);
+        RegStorage r_tgt = CallHelperSetup(helper_offset);
+        LoadValueDirectWideFixed(rl_src1, rs_d0);
+        LoadValueDirectWideFixed(rl_src2, rs_d1);
+        ClobberCallerSave();
+        CallHelper(r_tgt, helper_offset, false);
+      }
       rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 0a76b9b..8112c2e 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -77,10 +77,10 @@
   default:
     LOG(FATAL) << "Unexpected case: " << opcode;
   }
-  rl_shift = LoadValueWide(rl_shift, kCoreReg);
+  rl_shift = LoadValue(rl_shift, kCoreReg);
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg);
+  OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -361,11 +361,22 @@
   return rl_result;
 }
 
-bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(FATAL);
+bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
+  RegLocation rl_src = info->args[0];
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_dest = InlineTargetWide(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage sign_reg = AllocTempWide();
+  // abs(x) = y<=x>>63, (x+y)^y.
+  OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63);
+  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg);
+  OpRegReg(kOpXor, rl_result.reg, sign_reg);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
 
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
+bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+  DCHECK_EQ(cu_->instruction_set, kArm64);
   RegLocation rl_src1 = info->args[0];
   RegLocation rl_src2 = info->args[1];
   rl_src1 = LoadValue(rl_src1, kCoreReg);
@@ -373,61 +384,43 @@
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  // OpIT((is_min) ? kCondGt : kCondLt, "E");
-  OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
-  OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
-  GenBarrier();
+  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
+          rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
   StoreValue(rl_dest, rl_result);
   return true;
 }
 
 bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(WARNING);
-
   RegLocation rl_src_address = info->args[0];  // long address
-  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
+  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1] ?
   RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);   // kRefReg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
   if (size == k64) {
-    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
-    if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
-      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
-      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
-    } else {
-      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
-      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
-    }
     StoreValueWide(rl_dest, rl_result);
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
-    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
     StoreValue(rl_dest, rl_result);
   }
   return true;
 }
 
 bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(WARNING);
-
   RegLocation rl_src_address = info->args[0];  // long address
   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
   RegLocation rl_src_value = info->args[2];  // [size] value
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);   // kRefReg
+
+  RegLocation rl_value;
   if (size == k64) {
-    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
-    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32);
-    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32);
+    rl_value = LoadValueWide(rl_src_value, kCoreReg);
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
-    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
+    rl_value = LoadValue(rl_src_value, kCoreReg);
   }
+  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
   return true;
 }
 
@@ -444,71 +437,30 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(WARNING);
-
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  DCHECK_EQ(cu_->instruction_set, kArm64);
+  ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
   RegLocation rl_src_offset = info->args[2];  // long low
-  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
+  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3] //TODO: do we really need this
   RegLocation rl_src_expected = info->args[4];  // int, long or Object
   // If is_long, high half is in info->args[5]
   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
   // If is_long, high half is in info->args[7]
   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
 
-  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
-  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
-  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
-  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
-  // into the same temps, reducing the number of required temps down to 5. We shall work
-  // around the potentially locked temp by using LR for r_ptr, unconditionally.
-  // TODO: Pass information about the need for more temps to the stack frame generation
-  // code so that we can rely on being able to allocate enough temps.
-  DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp());
-  MarkTemp(rs_rA64_LR);
-  FreeTemp(rs_rA64_LR);
-  LockTemp(rs_rA64_LR);
-  bool load_early = true;
-  if (is_long) {
-    RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
-        rl_src_expected.reg;
-    RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
-        rl_src_new_value.reg;
-    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
-    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
-    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
-    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
-
-    if (!expected_is_good_reg && !new_value_is_good_reg) {
-      // None of expected/new_value is non-temp reg, need to load both late
-      load_early = false;
-      // Make sure they are not in the temp regs and the load will not be skipped.
-      if (expected_is_core_reg) {
-        FlushRegWide(rl_src_expected.reg);
-        ClobberSReg(rl_src_expected.s_reg_low);
-        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
-        rl_src_expected.location = kLocDalvikFrame;
-      }
-      if (new_value_is_core_reg) {
-        FlushRegWide(rl_src_new_value.reg);
-        ClobberSReg(rl_src_new_value.s_reg_low);
-        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
-        rl_src_new_value.location = kLocDalvikFrame;
-      }
-    }
-  }
-
-  // Release store semantics, get the barrier out of the way.  TODO: revisit
-  GenMemBarrier(kStoreLoad);
-
+  // Load Object and offset
   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
+  RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg);
+
   RegLocation rl_new_value;
-  if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value);
-  } else if (load_early) {
+  RegLocation rl_expected;
+  if (is_long) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
+    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
+  } else {
+    rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
+    rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
   }
 
   if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
@@ -516,9 +468,7 @@
     MarkGCCard(rl_new_value.reg, rl_object.reg);
   }
 
-  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
-
-  RegStorage r_ptr = rs_rA64_LR;
+  RegStorage r_ptr = AllocTempRef();
   OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
 
   // Free now unneeded rl_object and rl_offset to give more temps.
@@ -527,77 +477,40 @@
   ClobberSReg(rl_offset.s_reg_low);
   FreeTemp(rl_offset.reg);
 
-  RegLocation rl_expected;
-  if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected);
-  } else if (load_early) {
-    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
-  } else {
-    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
-    int low_reg = AllocTemp().GetReg();
-    int high_reg = AllocTemp().GetReg();
-    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
-    rl_expected = rl_new_value;
-  }
-
   // do {
   //   tmp = [r_ptr] - expected;
   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
   // result = tmp != 0;
 
-  RegStorage r_tmp = AllocTemp();
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-
+  RegStorage r_tmp;
   if (is_long) {
-    RegStorage r_tmp_high = AllocTemp();
-    if (!load_early) {
-      LoadValueDirectWide(rl_src_expected, rl_expected.reg);
-    }
-    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
-    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
-    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
-    if (!load_early) {
-      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
-    }
-
-    LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL);
-    LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL);
-    NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(),
-            rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
-    LIR* target2 = NewLIR0(kPseudoTargetLabel);
-    branch1->target = target2;
-    branch2->target = target2;
-    FreeTemp(r_tmp_high);  // Now unneeded
-
+    r_tmp = AllocTempWide();
+  } else if (is_object) {
+    r_tmp = AllocTempRef();
   } else {
-    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0);
-    OpRegReg(kOpSub, r_tmp, rl_expected.reg);
-    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-    // OpIT(kCondEq, "T");
-    NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
+    r_tmp = AllocTemp();
   }
 
-  // Still one conditional left from OpIT(kCondEq, "T") from either branch
-  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
-  OpCondBranch(kCondEq, target);
+  LIR* loop = NewLIR0(kPseudoTargetLabel);
+  NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg());
+  OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
+  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
+  LIR* early_exit = OpCondBranch(kCondNe, NULL);
 
-  if (!load_early) {
-    FreeTemp(rl_expected.reg);  // Now unneeded.
-  }
+  NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg());
+  NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT);
+  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
+  OpCondBranch(kCondNe, loop);
 
-  // result := (tmp1 != 0) ? 0 : 1;
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
-  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-  // OpIT(kCondUlt, "");
-  LoadConstant(rl_result.reg, 0); /* cc */
+  LIR* exit =  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
+  early_exit->target = exit;
+
   FreeTemp(r_tmp);  // Now unneeded.
+  FreeTemp(r_ptr);  // Now unneeded.
 
   StoreValue(rl_dest, rl_result);
 
-  // Now, restore lr to its non-temp status.
-  Clobber(rs_rA64_LR);
-  UnmarkTemp(rs_rA64_LR);
   return true;
 }
 
@@ -640,7 +553,7 @@
 LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
   // Combine sub & test using sub setflags encoding here
   OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
-  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   return OpCondBranch(c_code, target);
 }
 
@@ -673,7 +586,7 @@
 
   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
   DCHECK(!barrier->flags.use_def_invalid);
-  barrier->u.m.def_mask = ENCODE_ALL;
+  barrier->u.m.def_mask = &kEncodeAll;
   return ret;
 #else
   return false;
@@ -1021,7 +934,7 @@
 
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegImm(op, rl_result.reg, rl_src1.reg, val);
+  OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index b287399..e2846ae 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -127,6 +127,7 @@
     case kHiddenArg: res_reg = rs_x12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
@@ -138,41 +139,43 @@
 /*
  * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
  */
-uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) {
+ResourceMask Arm64Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
   // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor.
 
-  int reg_raw = reg.GetRawBits();
   // Check if the shape mask is zero (i.e. invalid).
   if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) {
     // The zero register is not a true register. It is just an immediate zero.
-    return 0;
+    return kEncodeNone;
   }
 
-  return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask);
+  return ResourceMask::Bit(
+      // FP register starts at bit position 32.
+      (reg.IsFloat() ? kArm64FPReg0 : 0) + reg.GetRegNum());
 }
 
-uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() {
+ResourceMask Arm64Mir2Lir::GetPCUseDefEncoding() const {
   LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64";
-  return 0ULL;
+  return kEncodeNone;
 }
 
 // Arm64 specific setup.  TODO: inline?:
-void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                            ResourceMask* use_mask, ResourceMask* def_mask) {
   DCHECK_EQ(cu_->instruction_set, kArm64);
   DCHECK(!lir->flags.use_def_invalid);
 
   // These flags are somewhat uncommon - bypass if we can.
   if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
     if (flags & REG_DEF_SP) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
+      def_mask->SetBit(kArm64RegSP);
     }
 
     if (flags & REG_USE_SP) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
+      use_mask->SetBit(kArm64RegSP);
     }
 
     if (flags & REG_DEF_LR) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+      def_mask->SetBit(kArm64RegLR);
     }
   }
 }
@@ -509,44 +512,44 @@
   return buf;
 }
 
-void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) {
+void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) {
   char buf[256];
   buf[0] = 0;
 
-  if (mask == ENCODE_ALL) {
+  if (mask.Equals(kEncodeAll)) {
     strcpy(buf, "all");
   } else {
     char num[8];
     int i;
 
-    for (i = 0; i < kArmRegEnd; i++) {
-      if (mask & (1ULL << i)) {
+    for (i = 0; i < kArm64RegEnd; i++) {
+      if (mask.HasBit(i)) {
         snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
       }
     }
 
-    if (mask & ENCODE_CCODE) {
+    if (mask.HasBit(ResourceMask::kCCode)) {
       strcat(buf, "cc ");
     }
-    if (mask & ENCODE_FP_STATUS) {
+    if (mask.HasBit(ResourceMask::kFPStatus)) {
       strcat(buf, "fpcc ");
     }
 
     /* Memory bits */
-    if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
+    if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) {
       snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
                DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
                DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
     }
-    if (mask & ENCODE_LITERAL) {
+    if (mask.HasBit(ResourceMask::kLiteral)) {
       strcat(buf, "lit ");
     }
 
-    if (mask & ENCODE_HEAP_REF) {
+    if (mask.HasBit(ResourceMask::kHeapRef)) {
       strcat(buf, "heap ");
     }
-    if (mask & ENCODE_MUST_NOT_ALIAS) {
+    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
       strcat(buf, "noalias ");
     }
   }
@@ -813,7 +816,7 @@
       }
     }
   }
-
+  *op_size = kWord;
   return RegStorage::InvalidReg();
 }
 
@@ -849,6 +852,8 @@
     return;
   }
 
+  // Handle dalvik registers.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
   for (int i = 0; i < cu_->num_ins; i++) {
     PromotionMap* v_map = &promotion_map_[start_vreg + i];
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index d0ab4f6..71e9e95 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -102,9 +102,9 @@
     data_target = AddWordData(&literal_list_, value);
   }
 
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
                             r_dest, 0, 0, 0, 0, data_target);
-  SetMemRefType(load_pc_rel, true, kLiteral);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
@@ -129,9 +129,9 @@
   }
 
   DCHECK(RegStorage::IsFloat(r_dest));
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
                             r_dest, 0, 0, 0, 0, data_target);
-  SetMemRefType(load_pc_rel, true, kLiteral);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
@@ -146,7 +146,7 @@
 
 static int CountSetBits(bool is_wide, uint64_t value) {
   return ((is_wide) ?
-          __builtin_popcountl(value) : __builtin_popcount((uint32_t)value));
+          __builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
 }
 
 /**
@@ -387,11 +387,11 @@
     case kOpRev:
       DCHECK_EQ(shift, 0);
       // Binary, but rm is encoded twice.
-      return NewLIR3(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
+      return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
       break;
     case kOpRevsh:
       // Binary, but rm is encoded twice.
-      return NewLIR3(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
+      return NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
       break;
     case kOp2Byte:
       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
@@ -426,8 +426,43 @@
   return NULL;
 }
 
+LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int extend) {
+  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (op) {
+    case kOpCmn:
+      opcode = kA64Cmn3Rre;
+      break;
+    case kOpCmp:
+      opcode = kA64Cmp3Rre;
+      break;
+    default:
+      LOG(FATAL) << "Bad Opcode: " << opcode;
+      break;
+  }
+
+  DCHECK(!IsPseudoLirOp(opcode));
+  if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
+    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+    if (kind == kFmtExtend) {
+      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), extend);
+    }
+  }
+
+  LOG(FATAL) << "Unexpected encoding operand count";
+  return NULL;
+}
+
 LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-  return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
+  /* RegReg operations with SP in first parameter need extended register instruction form.
+   * Only CMN and CMP instructions are implemented.
+   */
+  if (r_dest_src1 == rs_rA64_SP) {
+    return OpRegRegExtend(op, r_dest_src1, r_src2, ENCODE_NO_EXTEND);
+  } else {
+    return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
+  }
 }
 
 LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
@@ -517,8 +552,11 @@
   return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
 }
 
-// Should be taking an int64_t value ?
 LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
+  return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
+}
+
+LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
   LIR* res;
   bool neg = (value < 0);
   int64_t abs_value = (neg) ? -value : value;
@@ -526,17 +564,16 @@
   ArmOpcode alt_opcode = kA64Brk1d;
   int32_t log_imm = -1;
   bool is_wide = r_dest.Is64Bit();
-  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
 
   switch (op) {
     case kOpLsl: {
       // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
-      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)".
+      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)".
       // For now, we just use ubfm directly.
-      int max_value = (is_wide) ? 64 : 32;
+      int max_value = (is_wide) ? 63 : 31;
       return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
-                     (-value) & (max_value - 1), max_value - value);
+                     (-value) & max_value, max_value - value);
     }
     case kOpLsr:
       return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
@@ -603,11 +640,17 @@
     return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
   } else {
     RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, value);
+    if (IS_WIDE(wide)) {
+      r_scratch = AllocTempWide();
+      LoadConstantWide(r_scratch, value);
+    } else {
+      r_scratch = AllocTemp();
+      LoadConstant(r_scratch, value);
+    }
     if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
-      res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
     else
-      res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
+      res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
     FreeTemp(r_scratch);
     return res;
   }
@@ -632,9 +675,36 @@
     // abs_value is a shifted 12-bit immediate.
     shift = true;
     abs_value >>= 12;
+  } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
+    // Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
+    // This works for both normal registers and SP.
+    // For a frame size == 0x2468, it will be encoded as:
+    //   sub sp, #0x2000
+    //   sub sp, #0x468
+    if (neg) {
+      op = (op == kOpAdd) ? kOpSub : kOpAdd;
+    }
+    OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
+    return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
+  } else if (LIKELY(A64_REG_IS_SP(r_dest_src1.GetReg()) && (op == kOpAdd || op == kOpSub))) {
+    // Note: "sub sp, sp, Xm" is not correct on arm64.
+    // We need special instructions for SP.
+    // Also operation on 32-bit SP should be avoided.
+    DCHECK(IS_WIDE(wide));
+    RegStorage r_tmp = AllocTempWide();
+    OpRegRegImm(kOpAdd, r_tmp, r_dest_src1, 0);
+    OpRegImm64(op, r_tmp, value);
+    return OpRegRegImm(kOpAdd, r_dest_src1, r_tmp, 0);
   } else {
-    RegStorage r_tmp = AllocTemp();
-    LIR* res = LoadConstant(r_tmp, value);
+    RegStorage r_tmp;
+    LIR* res;
+    if (IS_WIDE(wide)) {
+      r_tmp = AllocTempWide();
+      res = LoadConstantWide(r_tmp, value);
+    } else {
+      r_tmp = AllocTemp();
+      res = LoadConstant(r_tmp, value);
+    }
     OpRegReg(op, r_dest_src1, r_tmp);
     FreeTemp(r_tmp);
     return res;
@@ -683,9 +753,9 @@
       data_target = AddWideData(&literal_list_, val_lo, val_hi);
     }
 
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
     LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
                       r_dest.GetReg(), 0, 0, 0, 0, data_target);
-    SetMemRefType(res, true, kLiteral);
     AppendLIR(res);
     return res;
   }
@@ -905,7 +975,8 @@
   }
 
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
-  if (r_base == rs_rA64_SP) {
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    DCHECK(r_base == rs_rA64_SP);
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
   }
   return load;
@@ -986,7 +1057,8 @@
   }
 
   // TODO: In future, may need to differentiate Dalvik & spill accesses.
-  if (r_base == rs_rA64_SP) {
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    DCHECK(r_base == rs_rA64_SP);
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
   }
   return store;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 3fbbc4e..ec0fb43 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -74,9 +74,9 @@
 
 void Mir2Lir::MarkSafepointPC(LIR* inst) {
   DCHECK(!inst->flags.use_def_invalid);
-  inst->u.m.def_mask = ENCODE_ALL;
+  inst->u.m.def_mask = &kEncodeAll;
   LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
-  DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL);
+  DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
 }
 
 /* Remove a LIR from the list. */
@@ -108,37 +108,40 @@
 }
 
 void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) {
-  uint64_t *mask_ptr;
-  uint64_t mask = ENCODE_MEM;
   DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE));
   DCHECK(!lir->flags.use_def_invalid);
+  // TODO: Avoid the extra Arena allocation!
+  const ResourceMask** mask_ptr;
+  ResourceMask mask;
   if (is_load) {
     mask_ptr = &lir->u.m.use_mask;
   } else {
     mask_ptr = &lir->u.m.def_mask;
   }
+  mask = **mask_ptr;
   /* Clear out the memref flags */
-  *mask_ptr &= ~mask;
+  mask.ClearBits(kEncodeMem);
   /* ..and then add back the one we need */
   switch (mem_type) {
-    case kLiteral:
+    case ResourceMask::kLiteral:
       DCHECK(is_load);
-      *mask_ptr |= ENCODE_LITERAL;
+      mask.SetBit(ResourceMask::kLiteral);
       break;
-    case kDalvikReg:
-      *mask_ptr |= ENCODE_DALVIK_REG;
+    case ResourceMask::kDalvikReg:
+      mask.SetBit(ResourceMask::kDalvikReg);
       break;
-    case kHeapRef:
-      *mask_ptr |= ENCODE_HEAP_REF;
+    case ResourceMask::kHeapRef:
+      mask.SetBit(ResourceMask::kHeapRef);
       break;
-    case kMustNotAlias:
+    case ResourceMask::kMustNotAlias:
       /* Currently only loads can be marked as kMustNotAlias */
       DCHECK(!(GetTargetInstFlags(lir->opcode) & IS_STORE));
-      *mask_ptr |= ENCODE_MUST_NOT_ALIAS;
+      mask.SetBit(ResourceMask::kMustNotAlias);
       break;
     default:
       LOG(FATAL) << "Oat: invalid memref kind - " << mem_type;
   }
+  *mask_ptr = mask_cache_.GetMask(mask);
 }
 
 /*
@@ -146,7 +149,8 @@
  */
 void Mir2Lir::AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load,
                                       bool is64bit) {
-  SetMemRefType(lir, is_load, kDalvikReg);
+  DCHECK((is_load ? lir->u.m.use_mask : lir->u.m.def_mask)->Intersection(kEncodeMem).Equals(
+      kEncodeDalvikReg));
 
   /*
    * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit
@@ -241,10 +245,10 @@
   }
 
   if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use"));
+    DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.use_mask, "use"));
   }
   if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def"));
+    DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.def_mask, "def"));
   }
 }
 
@@ -794,7 +798,7 @@
     new_label->operands[0] = keyVal;
     new_label->flags.fixup = kFixupLabel;
     DCHECK(!new_label->flags.use_def_invalid);
-    new_label->u.m.def_mask = ENCODE_ALL;
+    new_label->u.m.def_mask = &kEncodeAll;
     InsertLIRAfter(boundary_lir, new_label);
     res = new_label;
   }
@@ -972,7 +976,9 @@
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
       last_lir_insn_(NULL),
-      slow_paths_(arena, 32, kGrowableArraySlowPaths) {
+      slow_paths_(arena, 32, kGrowableArraySlowPaths),
+      mem_ref_type_(ResourceMask::kHeapRef),
+      mask_cache_(arena) {
   // Reserve pointer id 0 for NULL.
   size_t null_idx = WrapPointer(NULL);
   DCHECK_EQ(null_idx, 0U);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 62c81d0..f9081ce 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -44,7 +44,7 @@
   LIR* barrier = NewLIR0(kPseudoBarrier);
   /* Mark all resources as being clobbered */
   DCHECK(!barrier->flags.use_def_invalid);
-  barrier->u.m.def_mask = ENCODE_ALL;
+  barrier->u.m.def_mask = &kEncodeAll;
 }
 
 void Mir2Lir::GenDivZeroException() {
@@ -447,6 +447,7 @@
     for (int i = 0; i < elems; i++) {
       RegLocation loc = UpdateLoc(info->args[i]);
       if (loc.location == kLocPhysReg) {
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
         Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
       }
     }
@@ -455,7 +456,8 @@
      * this is an uncommon operation and isn't especially performance
      * critical.
      */
-    RegStorage r_src = AllocTemp();
+    // This is addressing the stack, which may be out of the 4G area.
+    RegStorage r_src = cu_->target64 ? AllocTempWide() : AllocTemp();
     RegStorage r_dst = AllocTemp();
     RegStorage r_idx = AllocTemp();
     RegStorage r_val;
@@ -484,7 +486,12 @@
     // Generate the copy loop.  Going backwards for convenience
     LIR* target = NewLIR0(kPseudoTargetLabel);
     // Copy next element
-    LoadBaseIndexed(r_src, r_idx, r_val, 2, k32);
+    {
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      LoadBaseIndexed(r_src, r_idx, r_val, 2, k32);
+      // NOTE: No dalvik register annotation, local optimizations will be stopped
+      // by the loop boundaries.
+    }
     StoreBaseIndexed(r_dst, r_idx, r_val, 2, k32);
     FreeTemp(r_val);
     OpDecAndBranch(kCondGe, r_idx, target);
@@ -1959,7 +1966,7 @@
 
   switch (opcode) {
     case Instruction::NOT_LONG:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenNotLong(rl_dest, rl_src2);
         return;
       }
@@ -2009,7 +2016,7 @@
       break;
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
         return;
       }
@@ -2020,7 +2027,7 @@
       break;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
         return;
       }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 842533b..a90a06e 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -155,7 +155,12 @@
   if (arg0.wide == 0) {
     LoadValueDirectFixed(arg0, TargetReg(kArg0));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+    }
     LoadValueDirectWideFixed(arg0, r_tmp);
   }
   ClobberCallerSave();
@@ -181,7 +186,12 @@
   if (arg1.wide == 0) {
     LoadValueDirectFixed(arg1, TargetReg(kArg1));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+    }
     LoadValueDirectWideFixed(arg1, r_tmp);
   }
   LoadConstant(TargetReg(kArg0), arg0);
@@ -277,6 +287,14 @@
     if (arg1.wide == 0) {
       if (cu_->instruction_set == kMips) {
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1));
+      } else if (cu_->instruction_set == kArm64) {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+      } else if (cu_->instruction_set == kX86_64) {
+        if (arg0.fp) {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg0));
+        } else {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg0) : TargetReg(kArg1));
+        }
       } else {
         LoadValueDirectFixed(arg1, TargetReg(kArg1));
       }
@@ -290,26 +308,51 @@
         }
         LoadValueDirectWideFixed(arg1, r_tmp);
       } else {
-        RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+        RegStorage r_tmp;
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+        }
         LoadValueDirectWideFixed(arg1, r_tmp);
       }
     }
   } else {
     RegStorage r_tmp;
     if (arg0.fp) {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+      if (cu_->instruction_set == kX86_64) {
+        r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
+      } else {
+        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+      }
     } else {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+      if (cu_->instruction_set == kX86_64) {
+        r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+      } else {
+        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+      }
     }
     LoadValueDirectWideFixed(arg0, r_tmp);
     if (arg1.wide == 0) {
-      LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+      if (cu_->instruction_set == kX86_64) {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+      } else {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+      }
     } else {
       RegStorage r_tmp;
       if (arg1.fp) {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+        }
       } else {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+        }
       }
       LoadValueDirectWideFixed(arg1, r_tmp);
     }
@@ -396,7 +439,12 @@
   if (arg2.wide == 0) {
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+    }
     LoadValueDirectWideFixed(arg2, r_tmp);
   }
   LoadConstant(TargetReg(kArg0), arg0);
@@ -466,6 +514,7 @@
    * end up half-promoted.  In those cases, we must flush the promoted
    * half to memory as well.
    */
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   for (int i = 0; i < cu_->num_ins; i++) {
     PromotionMap* v_map = &promotion_map_[start_vreg + i];
     RegStorage reg = GetArgMappingToPhysicalReg(i);
@@ -662,7 +711,7 @@
     case 0:  // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)]
       CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
       cg->LoadConstant(cg->TargetReg(kHiddenArg), target_method.dex_method_index);
-      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) {
+      if (cu->instruction_set == kX86) {
         cg->OpRegCopy(cg->TargetReg(kHiddenFpArg), cg->TargetReg(kHiddenArg));
       }
       break;
@@ -874,11 +923,17 @@
       } else {
         // kArg2 & rArg3 can safely be used here
         reg = TargetReg(kArg3);
-        Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
+        }
         call_state = next_call_insn(cu_, info, call_state, target_method,
                                     vtable_idx, direct_code, direct_method, type);
       }
-      Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg);
+      {
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+        Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg);
+      }
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                   direct_code, direct_method, type);
       next_use++;
@@ -902,12 +957,15 @@
                                     vtable_idx, direct_code, direct_method, type);
       }
       int outs_offset = (next_use + 1) * 4;
-      if (rl_arg.wide) {
-        StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64);
-        next_use += 2;
-      } else {
-        Store32Disp(TargetReg(kSp), outs_offset, arg_reg);
-        next_use++;
+      {
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+        if (rl_arg.wide) {
+          StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64);
+          next_use += 2;
+        } else {
+          Store32Disp(TargetReg(kSp), outs_offset, arg_reg);
+          next_use++;
+        }
       }
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                direct_code, direct_method, type);
@@ -971,12 +1029,14 @@
     if (loc.wide) {
       loc = UpdateLocWide(loc);
       if ((next_arg >= 2) && (loc.location == kLocPhysReg)) {
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
         StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
       }
       next_arg += 2;
     } else {
       loc = UpdateLoc(loc);
       if ((next_arg >= 3) && (loc.location == kLocPhysReg)) {
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
         Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
       }
       next_arg++;
@@ -999,24 +1059,32 @@
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
     OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
-    LIR* ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    LIR* ld = nullptr;
+    {
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    }
     // TUNING: loosen barrier
-    ld->u.m.def_mask = ENCODE_ALL;
-    SetMemRefType(ld, true /* is_load */, kDalvikReg);
+    ld->u.m.def_mask = &kEncodeAll;
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
     OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
-    LIR* st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack);
-    SetMemRefType(st, false /* is_load */, kDalvikReg);
-    st->u.m.def_mask = ENCODE_ALL;
+    LIR* st = nullptr;
+    {
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    }
+    st->u.m.def_mask = &kEncodeAll;
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
   } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
     int current_src_offset = start_offset;
     int current_dest_offset = outs_offset;
 
+    // Only davik regs are accessed in this loop; no next_call_insn() calls.
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     while (regs_left_to_pass_via_stack > 0) {
       // This is based on the knowledge that the stack itself is 16-byte aligned.
       bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
@@ -1083,8 +1151,7 @@
             AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
           } else {
             // Set barrier for 128-bit load.
-            SetMemRefType(ld1, true /* is_load */, kDalvikReg);
-            ld1->u.m.def_mask = ENCODE_ALL;
+            ld1->u.m.def_mask = &kEncodeAll;
           }
         }
         if (st1 != nullptr) {
@@ -1094,8 +1161,7 @@
             AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
           } else {
             // Set barrier for 128-bit store.
-            SetMemRefType(st1, false /* is_load */, kDalvikReg);
-            st1->u.m.def_mask = ENCODE_ALL;
+            st1->u.m.def_mask = &kEncodeAll;
           }
         }
 
@@ -1283,6 +1349,9 @@
       RegStorage t_reg = AllocTemp();
       OpRegReg(kOpNeg, t_reg, rl_result.reg);
       OpRegRegReg(kOpAdc, rl_result.reg, rl_result.reg, t_reg);
+    } else if (cu_->instruction_set == kArm64) {
+      OpRegImm(kOpSub, rl_result.reg, 1);
+      OpRegRegImm(kOpLsr, rl_result.reg, rl_result.reg, 31);
     } else {
       DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
       OpRegImm(kOpSub, rl_result.reg, 1);
@@ -1303,6 +1372,11 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (size == k64) {
     RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg);
+    if (cu_->instruction_set == kArm64) {
+      OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
+      StoreValueWide(rl_dest, rl_result);
+      return true;
+    }
     RegStorage r_i_low = rl_i.reg.GetLow();
     if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
       // First REV shall clobber rl_result.reg.GetReg(), save the value in a temp for the second REV.
@@ -1401,8 +1475,15 @@
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_dest = InlineTargetWide(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegCopyWide(rl_result.reg, rl_src.reg);
-  OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
+
+  if (cu_->instruction_set == kArm64) {
+    // TODO - Can ecode ? UBXF otherwise
+    // OpRegRegImm(kOpAnd, rl_result.reg, 0x7fffffffffffffff);
+    return false;
+  } else {
+    OpRegCopyWide(rl_result.reg, rl_src.reg);
+    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
+  }
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 2c8b9b9..6469d9c 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -65,6 +65,7 @@
         OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg);
       } else {
         // Lives in the frame, need to store.
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
         StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32);
       }
       if (!zero_reg.Valid()) {
@@ -90,6 +91,7 @@
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     if (rl_src.ref) {
       LoadRefDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest);
     } else {
@@ -123,6 +125,7 @@
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64);
   }
 }
@@ -210,6 +213,7 @@
   ResetDefLoc(rl_dest);
   if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
     def_start = last_lir_insn_;
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
@@ -296,6 +300,7 @@
     def_start = last_lir_insn_;
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64);
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
@@ -323,6 +328,7 @@
   ResetDefLoc(rl_dest);
   if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
     LIR *def_start = last_lir_insn_;
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     MarkClean(rl_dest);
     LIR *def_end = last_lir_insn_;
@@ -358,6 +364,7 @@
     LIR *def_start = last_lir_insn_;
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64);
     MarkClean(rl_dest);
     LIR *def_end = last_lir_insn_;
@@ -391,24 +398,34 @@
   return loc;
 }
 
-// FIXME: will need an update for 64-bit core regs.
 RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
   DCHECK(loc.wide);
   DCHECK(loc.location == kLocPhysReg);
   DCHECK(!loc.reg.IsFloat());
-  if (IsTemp(loc.reg.GetLow())) {
-    Clobber(loc.reg.GetLow());
+
+  if (!loc.reg.IsPair()) {
+    if (IsTemp(loc.reg)) {
+      Clobber(loc.reg);
+    } else {
+      RegStorage temp = AllocTempWide();
+      OpRegCopy(temp, loc.reg);
+      loc.reg = temp;
+    }
   } else {
-    RegStorage temp_low = AllocTemp();
-    OpRegCopy(temp_low, loc.reg.GetLow());
-    loc.reg.SetLowReg(temp_low.GetReg());
-  }
-  if (IsTemp(loc.reg.GetHigh())) {
-    Clobber(loc.reg.GetHigh());
-  } else {
-    RegStorage temp_high = AllocTemp();
-    OpRegCopy(temp_high, loc.reg.GetHigh());
-    loc.reg.SetHighReg(temp_high.GetReg());
+    if (IsTemp(loc.reg.GetLow())) {
+      Clobber(loc.reg.GetLow());
+    } else {
+      RegStorage temp_low = AllocTemp();
+      OpRegCopy(temp_low, loc.reg.GetLow());
+      loc.reg.SetLowReg(temp_low.GetReg());
+    }
+    if (IsTemp(loc.reg.GetHigh())) {
+      Clobber(loc.reg.GetHigh());
+    } else {
+      RegStorage temp_high = AllocTemp();
+      OpRegCopy(temp_high, loc.reg.GetHigh());
+      loc.reg.SetHighReg(temp_high.GetReg());
+    }
   }
 
   // Ensure that this doesn't represent the original SR any more.
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 4a918a1..b97ff2a 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -21,8 +21,8 @@
 #define DEBUG_OPT(X)
 
 /* Check RAW, WAR, and RAW dependency on the register operands */
-#define CHECK_REG_DEP(use, def, check) ((def & check->u.m.use_mask) || \
-                                        ((use | def) & check->u.m.def_mask))
+#define CHECK_REG_DEP(use, def, check) (def.Intersects(*check->u.m.use_mask)) || \
+                                       (use.Union(def).Intersects(*check->u.m.def_mask))
 
 /* Scheduler heuristics */
 #define MAX_HOIST_DISTANCE 20
@@ -109,20 +109,23 @@
     bool is_this_lir_load = target_flags & IS_LOAD;
     LIR* check_lir;
     /* Use the mem mask to determine the rough memory location */
-    uint64_t this_mem_mask = (this_lir->u.m.use_mask | this_lir->u.m.def_mask) & ENCODE_MEM;
+    ResourceMask this_mem_mask = kEncodeMem.Intersection(
+        this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask));
 
     /*
      * Currently only eliminate redundant ld/st for constant and Dalvik
      * register accesses.
      */
-    if (!(this_mem_mask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) {
+    if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg))) {
       continue;
     }
 
-    uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM;
-    uint64_t stop_use_reg_mask;
+    ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);
+    ResourceMask stop_use_reg_mask;
     if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-      stop_use_reg_mask = (IS_BRANCH | this_lir->u.m.use_mask) & ~ENCODE_MEM;
+      // TODO: Stop the abuse of kIsBranch as a bit specification for ResourceMask.
+      stop_use_reg_mask = ResourceMask::Bit(kIsBranch).Union(*this_lir->u.m.use_mask).Without(
+          kEncodeMem);
     } else {
       /*
        * Add pc to the resource mask to prevent this instruction
@@ -130,7 +133,7 @@
        * region bits since stop_mask is used to check data/control
        * dependencies.
        */
-        stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->u.m.use_mask) & ~ENCODE_MEM;
+      stop_use_reg_mask = GetPCUseDefEncoding().Union(*this_lir->u.m.use_mask).Without(kEncodeMem);
     }
 
     for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) {
@@ -142,8 +145,9 @@
         continue;
       }
 
-      uint64_t check_mem_mask = (check_lir->u.m.use_mask | check_lir->u.m.def_mask) & ENCODE_MEM;
-      uint64_t alias_condition = this_mem_mask & check_mem_mask;
+      ResourceMask check_mem_mask = kEncodeMem.Intersection(
+          check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask));
+      ResourceMask alias_condition = this_mem_mask.Intersection(check_mem_mask);
       bool stop_here = false;
 
       /*
@@ -153,9 +157,9 @@
       // TUNING: Support instructions with multiple register targets.
       if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) {
         stop_here = true;
-      } else if (check_mem_mask != ENCODE_MEM && alias_condition != 0) {
+      } else if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) {
         bool is_check_lir_load = check_flags & IS_LOAD;
-        if  (alias_condition == ENCODE_LITERAL) {
+        if  (alias_condition.Equals(kEncodeLiteral)) {
           /*
            * Should only see literal loads in the instruction
            * stream.
@@ -175,7 +179,7 @@
             }
             NopLIR(check_lir);
           }
-        } else if (alias_condition == ENCODE_DALVIK_REG) {
+        } else if (alias_condition.Equals(kEncodeDalvikReg)) {
           /* Must alias */
           if (check_lir->flags.alias_info == this_lir->flags.alias_info) {
             /* Only optimize compatible registers */
@@ -304,7 +308,7 @@
       continue;
     }
 
-    uint64_t stop_use_all_mask = this_lir->u.m.use_mask;
+    ResourceMask stop_use_all_mask = *this_lir->u.m.use_mask;
 
     if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
       /*
@@ -313,14 +317,14 @@
        * locations are safe to be hoisted. So only mark the heap references
        * conservatively here.
        */
-      if (stop_use_all_mask & ENCODE_HEAP_REF) {
-        stop_use_all_mask |= GetPCUseDefEncoding();
+      if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) {
+        stop_use_all_mask.SetBits(GetPCUseDefEncoding());
       }
     }
 
     /* Similar as above, but just check for pure register dependency */
-    uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM;
-    uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM;
+    ResourceMask stop_use_reg_mask = stop_use_all_mask.Without(kEncodeMem);
+    ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);
 
     int next_slot = 0;
     bool stop_here = false;
@@ -335,22 +339,22 @@
         continue;
       }
 
-      uint64_t check_mem_mask = check_lir->u.m.def_mask & ENCODE_MEM;
-      uint64_t alias_condition = stop_use_all_mask & check_mem_mask;
+      ResourceMask check_mem_mask = check_lir->u.m.def_mask->Intersection(kEncodeMem);
+      ResourceMask alias_condition = stop_use_all_mask.Intersection(check_mem_mask);
       stop_here = false;
 
       /* Potential WAR alias seen - check the exact relation */
-      if (check_mem_mask != ENCODE_MEM && alias_condition != 0) {
+      if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) {
         /* We can fully disambiguate Dalvik references */
-        if (alias_condition == ENCODE_DALVIK_REG) {
-          /* Must alias or partually overlap */
+        if (alias_condition.Equals(kEncodeDalvikReg)) {
+          /* Must alias or partially overlap */
           if ((check_lir->flags.alias_info == this_lir->flags.alias_info) ||
             IsDalvikRegisterClobbered(this_lir, check_lir)) {
             stop_here = true;
           }
         /* Conservatively treat all heap refs as may-alias */
         } else {
-          DCHECK_EQ(alias_condition, ENCODE_HEAP_REF);
+          DCHECK(alias_condition.Equals(kEncodeHeapRef));
           stop_here = true;
         }
         /* Memory content may be updated. Stop looking now. */
@@ -413,7 +417,7 @@
         LIR* prev_lir = prev_inst_list[slot+1];
 
         /* Check the highest instruction */
-        if (prev_lir->u.m.def_mask == ENCODE_ALL) {
+        if (prev_lir->u.m.def_mask->Equals(kEncodeAll)) {
           /*
            * If the first instruction is a load, don't hoist anything
            * above it since it is unlikely to be beneficial.
@@ -443,7 +447,8 @@
          */
         bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false :
             (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD);
-        if (((cur_lir->u.m.use_mask & prev_lir->u.m.def_mask) && prev_is_load) || (slot < LD_LATENCY)) {
+        if ((prev_is_load && (cur_lir->u.m.use_mask->Intersects(*prev_lir->u.m.def_mask))) ||
+            (slot < LD_LATENCY)) {
           break;
         }
       }
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index b26ab57..c7e9190 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -709,7 +709,7 @@
   return res;
 }
 
-int MipsMir2Lir::GetInsnSize(LIR* lir) {
+size_t MipsMir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   return EncodingMap[lir->opcode].size;
 }
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index e1bdb2e..c734202 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -19,6 +19,7 @@
 #include "codegen_mips.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
 #include "mips_lir.h"
 
 namespace art {
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index ea3c901..571adac 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -63,7 +63,7 @@
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
-    uint64_t GetRegMaskCommon(RegStorage reg);
+    ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
@@ -77,14 +77,15 @@
     int AssignInsnOffsets();
     void AssignOffsets();
     AssemblerStatus AssembleInstructions(CodeOffset start_addr);
-    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
+    void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                  ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
-    uint64_t GetPCUseDefEncoding();
+    ResourceMask GetPCUseDefEncoding() const OVERRIDE;
     uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
+    size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
     // Check support for volatile load/store of a given size.
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c1a7c99..76b5243 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -98,6 +98,7 @@
     case kHiddenArg: res_reg = rs_rT0; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = rs_rMIPS_COUNT; break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
@@ -119,60 +120,50 @@
 /*
  * Decode the register id.
  */
-uint64_t MipsMir2Lir::GetRegMaskCommon(RegStorage reg) {
-  uint64_t seed;
-  int shift;
-  int reg_id = reg.GetRegNum();
-  /* Each double register is equal to a pair of single-precision FP registers */
-  if (reg.IsDouble()) {
-    seed = 0x3;
-    reg_id = reg_id << 1;
-  } else {
-    seed = 1;
-  }
-  /* FP register starts at bit position 32 */
-  shift = reg.IsFloat() ? kMipsFPReg0 : 0;
-  /* Expand the double register id into single offset */
-  shift += reg_id;
-  return (seed << shift);
+ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
+  return reg.IsDouble()
+      /* Each double register is equal to a pair of single-precision FP registers */
+      ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0)
+      : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kMipsFPReg0 : reg.GetRegNum());
 }
 
-uint64_t MipsMir2Lir::GetPCUseDefEncoding() {
-  return ENCODE_MIPS_REG_PC;
+ResourceMask MipsMir2Lir::GetPCUseDefEncoding() const {
+  return ResourceMask::Bit(kMipsRegPC);
 }
 
 
-void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                           ResourceMask* use_mask, ResourceMask* def_mask) {
   DCHECK_EQ(cu_->instruction_set, kMips);
   DCHECK(!lir->flags.use_def_invalid);
 
   // Mips-specific resource map setup here.
   if (flags & REG_DEF_SP) {
-    lir->u.m.def_mask |= ENCODE_MIPS_REG_SP;
+    def_mask->SetBit(kMipsRegSP);
   }
 
   if (flags & REG_USE_SP) {
-    lir->u.m.use_mask |= ENCODE_MIPS_REG_SP;
+    use_mask->SetBit(kMipsRegSP);
   }
 
   if (flags & REG_DEF_LR) {
-    lir->u.m.def_mask |= ENCODE_MIPS_REG_LR;
+    def_mask->SetBit(kMipsRegLR);
   }
 
   if (flags & REG_DEF_HI) {
-    lir->u.m.def_mask |= ENCODE_MIPS_REG_HI;
+    def_mask->SetBit(kMipsRegHI);
   }
 
   if (flags & REG_DEF_LO) {
-    lir->u.m.def_mask |= ENCODE_MIPS_REG_LO;
+    def_mask->SetBit(kMipsRegLO);
   }
 
   if (flags & REG_USE_HI) {
-    lir->u.m.use_mask |= ENCODE_MIPS_REG_HI;
+    use_mask->SetBit(kMipsRegHI);
   }
 
   if (flags & REG_USE_LO) {
-    lir->u.m.use_mask |= ENCODE_MIPS_REG_LO;
+    use_mask->SetBit(kMipsRegLO);
   }
 }
 
@@ -282,43 +273,43 @@
 }
 
 // FIXME: need to redo resource maps for MIPS - fix this at that time
-void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, uint64_t mask, const char *prefix) {
+void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, const ResourceMask& mask, const char *prefix) {
   char buf[256];
   buf[0] = 0;
 
-  if (mask == ENCODE_ALL) {
+  if (mask.Equals(kEncodeAll)) {
     strcpy(buf, "all");
   } else {
     char num[8];
     int i;
 
     for (i = 0; i < kMipsRegEnd; i++) {
-      if (mask & (1ULL << i)) {
+      if (mask.HasBit(i)) {
         snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
       }
     }
 
-    if (mask & ENCODE_CCODE) {
+    if (mask.HasBit(ResourceMask::kCCode)) {
       strcat(buf, "cc ");
     }
-    if (mask & ENCODE_FP_STATUS) {
+    if (mask.HasBit(ResourceMask::kFPStatus)) {
       strcat(buf, "fpcc ");
     }
     /* Memory bits */
-    if (mips_lir && (mask & ENCODE_DALVIK_REG)) {
+    if (mips_lir && (mask.HasBit(ResourceMask::kDalvikReg))) {
       snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
                DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info),
                DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : "");
     }
-    if (mask & ENCODE_LITERAL) {
+    if (mask.HasBit(ResourceMask::kLiteral)) {
       strcat(buf, "lit ");
     }
 
-    if (mask & ENCODE_HEAP_REF) {
+    if (mask.HasBit(ResourceMask::kHeapRef)) {
       strcat(buf, "heap ");
     }
-    if (mask & ENCODE_MUST_NOT_ALIAS) {
+    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
       strcat(buf, "noalias ");
     }
   }
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 2757b7b..01b25f92 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -534,7 +534,8 @@
     }
   }
 
-  if (r_base == rs_rMIPS_SP) {
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    DCHECK(r_base == rs_rMIPS_SP);
     AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
                             true /* is_load */, pair /* is64bit */);
     if (pair) {
@@ -634,7 +635,8 @@
     FreeTemp(r_scratch);
   }
 
-  if (r_base == rs_rMIPS_SP) {
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    DCHECK(r_base == rs_rMIPS_SP);
     AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
                             false /* is_load */, pair /* is64bit */);
     if (pair) {
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 2f37520..9912101 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -57,7 +57,7 @@
       (opcode == kPseudoExportedPC)) {
     // Always make labels scheduling barriers
     DCHECK(!insn->flags.use_def_invalid);
-    insn->u.m.use_mask = insn->u.m.def_mask = ENCODE_ALL;
+    insn->u.m.use_mask = insn->u.m.def_mask = &kEncodeAll;
   }
   return insn;
 }
@@ -140,19 +140,20 @@
 /*
  * Mark the corresponding bit(s).
  */
-inline void Mir2Lir::SetupRegMask(uint64_t* mask, int reg) {
+inline void Mir2Lir::SetupRegMask(ResourceMask* mask, int reg) {
   DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0);
   DCHECK(reginfo_map_.Get(reg) != nullptr) << "No info for 0x" << reg;
-  *mask |= reginfo_map_.Get(reg)->DefUseMask();
+  *mask = mask->Union(reginfo_map_.Get(reg)->DefUseMask());
 }
 
 /*
  * Set up the proper fields in the resource mask
  */
-inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) {
+inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
   int opcode = lir->opcode;
 
   if (IsPseudoLirOp(opcode)) {
+    lir->u.m.use_mask = lir->u.m.def_mask = &kEncodeNone;
     if (opcode != kPseudoBarrier) {
       lir->flags.fixup = kFixupLabel;
     }
@@ -166,13 +167,27 @@
     lir->flags.fixup = kFixupLabel;
   }
 
-  /* Get the starting size of the instruction's template */
+  /* Get the starting size of the instruction's template. */
   lir->flags.size = GetInsnSize(lir);
   estimated_native_code_size_ += lir->flags.size;
-  /* Set up the mask for resources that are updated */
-  if (!leave_mem_ref && (flags & (IS_LOAD | IS_STORE))) {
-    /* Default to heap - will catch specialized classes later */
-    SetMemRefType(lir, flags & IS_LOAD, kHeapRef);
+
+  /* Set up the mask for resources. */
+  ResourceMask use_mask;
+  ResourceMask def_mask;
+
+  if (flags & (IS_LOAD | IS_STORE)) {
+    /* Set memory reference type (defaults to heap, overridden by ScopedMemRefType). */
+    if (flags & IS_LOAD) {
+      use_mask.SetBit(mem_ref_type_);
+    } else {
+      /* Currently only loads can be marked as kMustNotAlias. */
+      DCHECK(mem_ref_type_ != ResourceMask::kMustNotAlias);
+    }
+    if (flags & IS_STORE) {
+      /* Literals cannot be written to. */
+      DCHECK(mem_ref_type_ != ResourceMask::kLiteral);
+      def_mask.SetBit(mem_ref_type_);
+    }
   }
 
   /*
@@ -180,52 +195,55 @@
    * turn will trash everything.
    */
   if (flags & IS_BRANCH) {
-    lir->u.m.def_mask = lir->u.m.use_mask = ENCODE_ALL;
+    lir->u.m.def_mask = lir->u.m.use_mask = &kEncodeAll;
     return;
   }
 
   if (flags & REG_DEF0) {
-    SetupRegMask(&lir->u.m.def_mask, lir->operands[0]);
+    SetupRegMask(&def_mask, lir->operands[0]);
   }
 
   if (flags & REG_DEF1) {
-    SetupRegMask(&lir->u.m.def_mask, lir->operands[1]);
+    SetupRegMask(&def_mask, lir->operands[1]);
   }
 
   if (flags & REG_DEF2) {
-    SetupRegMask(&lir->u.m.def_mask, lir->operands[2]);
+    SetupRegMask(&def_mask, lir->operands[2]);
   }
 
   if (flags & REG_USE0) {
-    SetupRegMask(&lir->u.m.use_mask, lir->operands[0]);
+    SetupRegMask(&use_mask, lir->operands[0]);
   }
 
   if (flags & REG_USE1) {
-    SetupRegMask(&lir->u.m.use_mask, lir->operands[1]);
+    SetupRegMask(&use_mask, lir->operands[1]);
   }
 
   if (flags & REG_USE2) {
-    SetupRegMask(&lir->u.m.use_mask, lir->operands[2]);
+    SetupRegMask(&use_mask, lir->operands[2]);
   }
 
   if (flags & REG_USE3) {
-    SetupRegMask(&lir->u.m.use_mask, lir->operands[3]);
+    SetupRegMask(&use_mask, lir->operands[3]);
   }
 
   if (flags & REG_USE4) {
-    SetupRegMask(&lir->u.m.use_mask, lir->operands[4]);
+    SetupRegMask(&use_mask, lir->operands[4]);
   }
 
   if (flags & SETS_CCODES) {
-    lir->u.m.def_mask |= ENCODE_CCODE;
+    def_mask.SetBit(ResourceMask::kCCode);
   }
 
   if (flags & USES_CCODES) {
-    lir->u.m.use_mask |= ENCODE_CCODE;
+    use_mask.SetBit(ResourceMask::kCCode);
   }
 
   // Handle target-specific actions
-  SetupTargetResourceMasks(lir, flags);
+  SetupTargetResourceMasks(lir, flags, &def_mask, &use_mask);
+
+  lir->u.m.use_mask = mask_cache_.GetMask(use_mask);
+  lir->u.m.def_mask = mask_cache_.GetMask(def_mask);
 }
 
 inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(RegStorage reg) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1f12b6f..40205ea 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -68,20 +68,52 @@
 
 // TODO: needs revisit for 64-bit.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
-
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+
+  if (cu_->instruction_set == kX86) {
     /*
      * When doing a call for x86, it moves the stack pointer in order to push return.
      * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     * TODO: This needs revisited for 64-bit.
      */
     offset += sizeof(uint32_t);
   }
 
+  if (cu_->instruction_set == kX86_64) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+     */
+    offset += sizeof(uint64_t);
+  }
+
+  if (cu_->instruction_set == kX86_64) {
+    RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+    if (!reg_arg.Valid()) {
+      RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+      LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32);
+      return new_reg;
+    } else {
+      // Check if we need to copy the arg to a different reg_class.
+      if (!RegClassMatches(reg_class, reg_arg)) {
+        if (wide) {
+          RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+          OpRegCopyWide(new_reg, reg_arg);
+          reg_arg = new_reg;
+        } else {
+          RegStorage new_reg = AllocTypedTemp(false, reg_class);
+          OpRegCopy(new_reg, reg_arg);
+          reg_arg = new_reg;
+        }
+      }
+    }
+    return reg_arg;
+  }
+
+  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
+      RegStorage::InvalidReg();
+
   // If the VR is wide and there is no register for high part, we need to load it.
   if (wide && !reg_arg_high.Valid()) {
     // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
@@ -128,16 +160,24 @@
 }
 
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+  if (cu_->instruction_set == kX86) {
     /*
      * When doing a call for x86, it moves the stack pointer in order to push return.
      * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     * TODO: This needs revisited for 64-bit.
      */
     offset += sizeof(uint32_t);
   }
 
+  if (cu_->instruction_set == kX86_64) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+     */
+    offset += sizeof(uint64_t);
+  }
+
   if (!rl_dest.wide) {
     RegStorage reg = GetArgMappingToPhysicalReg(in_position);
     if (reg.Valid()) {
@@ -146,6 +186,16 @@
       Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
     }
   } else {
+    if (cu_->instruction_set == kX86_64) {
+      RegStorage reg = GetArgMappingToPhysicalReg(in_position);
+      if (reg.Valid()) {
+        OpRegCopy(rl_dest.reg, reg);
+      } else {
+        LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
+      }
+      return;
+    }
+
     RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
     RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
 
@@ -1123,7 +1173,7 @@
       head_lir = &block_label_list_[bb->id];
       // Set the first label as a scheduling barrier.
       DCHECK(!head_lir->flags.use_def_invalid);
-      head_lir->u.m.def_mask = ENCODE_ALL;
+      head_lir->u.m.def_mask = &kEncodeAll;
     }
 
     if (opcode == kMirOpCheck) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ed94a8d..9155677 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -23,6 +23,7 @@
 #include "dex/compiler_ir.h"
 #include "dex/reg_storage.h"
 #include "dex/backend.h"
+#include "dex/quick/resource_mask.h"
 #include "driver/compiler_driver.h"
 #include "leb128.h"
 #include "safe_map.h"
@@ -136,8 +137,8 @@
 typedef std::vector<uint8_t> CodeBuffer;
 
 struct UseDefMasks {
-  uint64_t use_mask;        // Resource mask for use.
-  uint64_t def_mask;        // Resource mask for def.
+  const ResourceMask* use_mask;        // Resource mask for use.
+  const ResourceMask* def_mask;        // Resource mask for def.
 };
 
 struct AssemblyInfo {
@@ -188,20 +189,6 @@
 #define DECODE_ALIAS_INFO_WIDE(X)       ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0)
 #define ENCODE_ALIAS_INFO(REG, ISWIDE)  (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0))
 
-// Common resource macros.
-#define ENCODE_CCODE            (1ULL << kCCode)
-#define ENCODE_FP_STATUS        (1ULL << kFPStatus)
-
-// Abstract memory locations.
-#define ENCODE_DALVIK_REG       (1ULL << kDalvikReg)
-#define ENCODE_LITERAL          (1ULL << kLiteral)
-#define ENCODE_HEAP_REF         (1ULL << kHeapRef)
-#define ENCODE_MUST_NOT_ALIAS   (1ULL << kMustNotAlias)
-
-#define ENCODE_ALL              (~0ULL)
-#define ENCODE_MEM              (ENCODE_DALVIK_REG | ENCODE_LITERAL | \
-                                 ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS)
-
 #define ENCODE_REG_PAIR(low_reg, high_reg) ((low_reg & 0xff) | ((high_reg & 0xff) << 8))
 #define DECODE_REG_PAIR(both_regs, low_reg, high_reg) \
   do { \
@@ -327,7 +314,7 @@
      */
     class RegisterInfo {
      public:
-      RegisterInfo(RegStorage r, uint64_t mask = ENCODE_ALL);
+      RegisterInfo(RegStorage r, const ResourceMask& mask = kEncodeAll);
       ~RegisterInfo() {}
       static void* operator new(size_t size, ArenaAllocator* arena) {
         return arena->Alloc(size, kArenaAllocRegAlloc);
@@ -378,8 +365,8 @@
       RegStorage Partner() { return partner_; }
       void SetPartner(RegStorage partner) { partner_ = partner; }
       int SReg() { return (!IsTemp() || IsLive()) ? s_reg_ : INVALID_SREG; }
-      uint64_t DefUseMask() { return def_use_mask_; }
-      void SetDefUseMask(uint64_t def_use_mask) { def_use_mask_ = def_use_mask; }
+      const ResourceMask& DefUseMask() { return def_use_mask_; }
+      void SetDefUseMask(const ResourceMask& def_use_mask) { def_use_mask_ = def_use_mask; }
       RegisterInfo* Master() { return master_; }
       void SetMaster(RegisterInfo* master) {
         master_ = master;
@@ -417,7 +404,7 @@
       bool aliased_;               // Is this the master for other aliased RegisterInfo's?
       RegStorage partner_;         // If wide_value, other reg of pair or self if 64-bit register.
       int s_reg_;                  // Name of live value.
-      uint64_t def_use_mask_;      // Resources for this element.
+      ResourceMask def_use_mask_;  // Resources for this element.
       uint32_t used_storage_;      // 1 bit per 4 bytes of storage. Unused by aliases.
       uint32_t liveness_;          // 1 bit per 4 bytes of storage. Unused by aliases.
       RegisterInfo* master_;       // Pointer to controlling storage mask.
@@ -539,6 +526,26 @@
       LIR* const cont_;
     };
 
+    // Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_.
+    class ScopedMemRefType {
+     public:
+      ScopedMemRefType(Mir2Lir* m2l, ResourceMask::ResourceBit new_mem_ref_type)
+          : m2l_(m2l),
+            old_mem_ref_type_(m2l->mem_ref_type_) {
+        m2l_->mem_ref_type_ = new_mem_ref_type;
+      }
+
+      ~ScopedMemRefType() {
+        m2l_->mem_ref_type_ = old_mem_ref_type_;
+      }
+
+     private:
+      Mir2Lir* const m2l_;
+      ResourceMask::ResourceBit old_mem_ref_type_;
+
+      DISALLOW_COPY_AND_ASSIGN(ScopedMemRefType);
+    };
+
     virtual ~Mir2Lir() {}
 
     int32_t s4FromSwitchData(const void* switch_data) {
@@ -625,10 +632,10 @@
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
     void MarkSafepointPC(LIR* inst);
-    void SetupResourceMasks(LIR* lir, bool leave_mem_ref = false);
+    void SetupResourceMasks(LIR* lir);
     void SetMemRefType(LIR* lir, bool is_load, int mem_type);
     void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit);
-    void SetupRegMask(uint64_t* mask, int reg);
+    void SetupRegMask(ResourceMask* mask, int reg);
     void DumpLIRInsn(LIR* arg, unsigned char* base_addr);
     void DumpPromotionMap();
     void CodegenDump();
@@ -910,13 +917,13 @@
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
     virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+    virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                              NextCallInsn next_call_insn,
                              const MethodReference& target_method,
                              uint32_t vtable_idx,
                              uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
                              bool skip_this);
-    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+    virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
                            NextCallInsn next_call_insn,
                            const MethodReference& target_method,
                            uint32_t vtable_idx,
@@ -945,7 +952,7 @@
     bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
-    bool GenInlinedAbsLong(CallInfo* info);
+    virtual bool GenInlinedAbsLong(CallInfo* info);
     bool GenInlinedAbsFloat(CallInfo* info);
     bool GenInlinedAbsDouble(CallInfo* info);
     bool GenInlinedFloatCvt(CallInfo* info);
@@ -1136,7 +1143,7 @@
     virtual RegLocation LocCReturnDouble() = 0;
     virtual RegLocation LocCReturnFloat() = 0;
     virtual RegLocation LocCReturnWide() = 0;
-    virtual uint64_t GetRegMaskCommon(RegStorage reg) = 0;
+    virtual ResourceMask GetRegMaskCommon(const RegStorage& reg) const = 0;
     virtual void AdjustSpillMask() = 0;
     virtual void ClobberCallerSave() = 0;
     virtual void FreeCallTemps() = 0;
@@ -1147,14 +1154,15 @@
 
     // Required for target - miscellaneous.
     virtual void AssembleLIR() = 0;
-    virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0;
-    virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags) = 0;
+    virtual void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) = 0;
+    virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                          ResourceMask* use_mask, ResourceMask* def_mask) = 0;
     virtual const char* GetTargetInstFmt(int opcode) = 0;
     virtual const char* GetTargetInstName(int opcode) = 0;
     virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0;
-    virtual uint64_t GetPCUseDefEncoding() = 0;
+    virtual ResourceMask GetPCUseDefEncoding() const = 0;
     virtual uint64_t GetTargetInstFlags(int opcode) = 0;
-    virtual int GetInsnSize(LIR* lir) = 0;
+    virtual size_t GetInsnSize(LIR* lir) = 0;
     virtual bool IsUnconditionalBranch(LIR* lir) = 0;
 
     // Check support for volatile load/store of a given size.
@@ -1576,6 +1584,17 @@
     LIR* last_lir_insn_;
 
     GrowableArray<LIRSlowPath*> slow_paths_;
+
+    // The memory reference type for new LIRs.
+    // NOTE: Passing this as an explicit parameter by all functions that directly or indirectly
+    // invoke RawLIR() would clutter the code and reduce the readability.
+    ResourceMask::ResourceBit mem_ref_type_;
+
+    // Each resource mask now takes 16-bytes, so having both use/def masks directly in a LIR
+    // would consume 32 bytes per LIR. Instead, the LIR now holds only pointers to the masks
+    // (i.e. 8 bytes on 32-bit arch, 16 bytes on 64-bit arch) and we use ResourceMaskCache
+    // to deduplicate the masks.
+    ResourceMaskCache mask_cache_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index bbeef50..cae59c8 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -38,7 +38,7 @@
   }
 }
 
-Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, uint64_t mask)
+Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, const ResourceMask& mask)
   : reg_(r), is_temp_(false), wide_value_(false), dirty_(false), aliased_(false), partner_(r),
     s_reg_(INVALID_SREG), def_use_mask_(mask), master_(this), def_start_(nullptr),
     def_end_(nullptr), alias_chain_(nullptr) {
@@ -82,22 +82,22 @@
   }
 
   // Construct the register pool.
-  for (RegStorage reg : core_regs) {
+  for (const RegStorage& reg : core_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
     m2l_->reginfo_map_.Put(reg.GetReg(), info);
     core_regs_.Insert(info);
   }
-  for (RegStorage reg : core64_regs) {
+  for (const RegStorage& reg : core64_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
     m2l_->reginfo_map_.Put(reg.GetReg(), info);
     core64_regs_.Insert(info);
   }
-  for (RegStorage reg : sp_regs) {
+  for (const RegStorage& reg : sp_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
     m2l_->reginfo_map_.Put(reg.GetReg(), info);
     sp_regs_.Insert(info);
   }
-  for (RegStorage reg : dp_regs) {
+  for (const RegStorage& reg : dp_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
     m2l_->reginfo_map_.Put(reg.GetReg(), info);
     dp_regs_.Insert(info);
@@ -126,7 +126,7 @@
   }
 
   // Add an entry for InvalidReg with zero'd mask.
-  RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0);
+  RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), kEncodeNone);
   m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg);
 
   // Existence of core64 registers implies wide references.
@@ -734,6 +734,7 @@
         info1 = info2;
       }
       int v_reg = mir_graph_->SRegToVReg(info1->SReg());
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
       StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64);
     }
   } else {
@@ -741,6 +742,7 @@
     if (info->IsLive() && info->IsDirty()) {
       info->SetIsDirty(false);
       int v_reg = mir_graph_->SRegToVReg(info->SReg());
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
       StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64);
     }
   }
@@ -752,6 +754,7 @@
   if (info->IsLive() && info->IsDirty()) {
     info->SetIsDirty(false);
     int v_reg = mir_graph_->SRegToVReg(info->SReg());
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, kWord);
   }
 }
diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc
new file mode 100644
index 0000000..17995fb
--- /dev/null
+++ b/compiler/dex/quick/resource_mask.cc
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iomanip>
+
+#include "resource_mask.h"
+
+#include "utils/arena_allocator.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+constexpr ResourceMask kNoRegMasks[] = {
+    kEncodeNone,
+    kEncodeHeapRef,
+    kEncodeLiteral,
+    kEncodeDalvikReg,
+    ResourceMask::Bit(ResourceMask::kFPStatus),
+    ResourceMask::Bit(ResourceMask::kCCode),
+};
+// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set.
+COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kHeapRef].Equals(
+    kEncodeHeapRef), check_kNoRegMasks_heap_ref_index);
+COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kLiteral].Equals(
+    kEncodeLiteral), check_kNoRegMasks_literal_index);
+COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kDalvikReg].Equals(
+    kEncodeDalvikReg), check_kNoRegMasks_dalvik_reg_index);
+COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kFPStatus].Equals(
+    ResourceMask::Bit(ResourceMask::kFPStatus)), check_kNoRegMasks_fp_status_index);
+COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kCCode].Equals(
+    ResourceMask::Bit(ResourceMask::kCCode)), check_kNoRegMasks_ccode_index);
+
+template <size_t special_bit>
+constexpr ResourceMask OneRegOneSpecial(size_t reg) {
+  return ResourceMask::Bit(reg).Union(ResourceMask::Bit(special_bit));
+}
+
+// NOTE: Working around gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61484 .
+// This should be a two-dimensions array, kSingleRegMasks[][32] and each line should be
+// enclosed in an extra { }. However, gcc issues a bogus "error: array must be initialized
+// with a brace-enclosed initializer" for that, so we flatten this to a one-dimensional array.
+constexpr ResourceMask kSingleRegMasks[] = {
+#define DEFINE_LIST_32(fn) \
+    fn(0), fn(1), fn(2), fn(3), fn(4), fn(5), fn(6), fn(7),           \
+    fn(8), fn(9), fn(10), fn(11), fn(12), fn(13), fn(14), fn(15),     \
+    fn(16), fn(17), fn(18), fn(19), fn(20), fn(21), fn(22), fn(23),   \
+    fn(24), fn(25), fn(26), fn(27), fn(28), fn(29), fn(30), fn(31)
+    // NOTE: Each line is 512B of constant data, 3KiB in total.
+    DEFINE_LIST_32(ResourceMask::Bit),
+    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kHeapRef>),
+    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kLiteral>),
+    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kDalvikReg>),
+    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kFPStatus>),
+    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kCCode>),
+#undef DEFINE_LIST_32
+};
+
+constexpr size_t SingleRegMaskIndex(size_t main_index, size_t sub_index) {
+  return main_index * 32u + sub_index;
+}
+
+// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set.
+COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kHeapRef, 0)].Equals(
+    OneRegOneSpecial<ResourceMask::kHeapRef>(0)), check_kSingleRegMasks_heap_ref_index);
+COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kLiteral, 0)].Equals(
+    OneRegOneSpecial<ResourceMask::kLiteral>(0)), check_kSingleRegMasks_literal_index);
+COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kDalvikReg, 0)].Equals(
+    OneRegOneSpecial<ResourceMask::kDalvikReg>(0)), check_kSingleRegMasks_dalvik_reg_index);
+COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kFPStatus, 0)].Equals(
+    OneRegOneSpecial<ResourceMask::kFPStatus>(0)), check_kSingleRegMasks_fp_status_index);
+COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kCCode, 0)].Equals(
+    OneRegOneSpecial<ResourceMask::kCCode>(0)), check_kSingleRegMasks_ccode_index);
+
+// NOTE: arraysize(kNoRegMasks) multiplied by 32 due to the gcc bug workaround, see above.
+COMPILE_ASSERT(arraysize(kSingleRegMasks) == arraysize(kNoRegMasks) * 32, check_arraysizes);
+
+constexpr ResourceMask kTwoRegsMasks[] = {
+#define TWO(a, b) ResourceMask::Bit(a).Union(ResourceMask::Bit(b))
+    // NOTE: 16 * 15 / 2 = 120 entries, 16 bytes each, 1920B in total.
+    TWO(0, 1),
+    TWO(0, 2), TWO(1, 2),
+    TWO(0, 3), TWO(1, 3), TWO(2, 3),
+    TWO(0, 4), TWO(1, 4), TWO(2, 4), TWO(3, 4),
+    TWO(0, 5), TWO(1, 5), TWO(2, 5), TWO(3, 5), TWO(4, 5),
+    TWO(0, 6), TWO(1, 6), TWO(2, 6), TWO(3, 6), TWO(4, 6), TWO(5, 6),
+    TWO(0, 7), TWO(1, 7), TWO(2, 7), TWO(3, 7), TWO(4, 7), TWO(5, 7), TWO(6, 7),
+    TWO(0, 8), TWO(1, 8), TWO(2, 8), TWO(3, 8), TWO(4, 8), TWO(5, 8), TWO(6, 8), TWO(7, 8),
+    TWO(0, 9), TWO(1, 9), TWO(2, 9), TWO(3, 9), TWO(4, 9), TWO(5, 9), TWO(6, 9), TWO(7, 9),
+        TWO(8, 9),
+    TWO(0, 10), TWO(1, 10), TWO(2, 10), TWO(3, 10), TWO(4, 10), TWO(5, 10), TWO(6, 10), TWO(7, 10),
+        TWO(8, 10), TWO(9, 10),
+    TWO(0, 11), TWO(1, 11), TWO(2, 11), TWO(3, 11), TWO(4, 11), TWO(5, 11), TWO(6, 11), TWO(7, 11),
+        TWO(8, 11), TWO(9, 11), TWO(10, 11),
+    TWO(0, 12), TWO(1, 12), TWO(2, 12), TWO(3, 12), TWO(4, 12), TWO(5, 12), TWO(6, 12), TWO(7, 12),
+        TWO(8, 12), TWO(9, 12), TWO(10, 12), TWO(11, 12),
+    TWO(0, 13), TWO(1, 13), TWO(2, 13), TWO(3, 13), TWO(4, 13), TWO(5, 13), TWO(6, 13), TWO(7, 13),
+        TWO(8, 13), TWO(9, 13), TWO(10, 13), TWO(11, 13), TWO(12, 13),
+    TWO(0, 14), TWO(1, 14), TWO(2, 14), TWO(3, 14), TWO(4, 14), TWO(5, 14), TWO(6, 14), TWO(7, 14),
+        TWO(8, 14), TWO(9, 14), TWO(10, 14), TWO(11, 14), TWO(12, 14), TWO(13, 14),
+    TWO(0, 15), TWO(1, 15), TWO(2, 15), TWO(3, 15), TWO(4, 15), TWO(5, 15), TWO(6, 15), TWO(7, 15),
+        TWO(8, 15), TWO(9, 15), TWO(10, 15), TWO(11, 15), TWO(12, 15), TWO(13, 15), TWO(14, 15),
+#undef TWO
+};
+COMPILE_ASSERT(arraysize(kTwoRegsMasks) ==  16 * 15 / 2, check_arraysize_kTwoRegsMasks);
+
+constexpr size_t TwoRegsIndex(size_t higher, size_t lower) {
+  return (higher * (higher - 1)) / 2u + lower;
+}
+
+constexpr bool CheckTwoRegsMask(size_t higher, size_t lower) {
+  return ResourceMask::Bit(lower).Union(ResourceMask::Bit(higher)).Equals(
+      kTwoRegsMasks[TwoRegsIndex(higher, lower)]);
+}
+
+constexpr bool CheckTwoRegsMaskLine(size_t line, size_t lower = 0u) {
+  return (lower == line) ||
+      (CheckTwoRegsMask(line, lower) && CheckTwoRegsMaskLine(line, lower + 1u));
+}
+
+constexpr bool CheckTwoRegsMaskTable(size_t lines) {
+  return lines == 0 ||
+      (CheckTwoRegsMaskLine(lines - 1) && CheckTwoRegsMaskTable(lines - 1u));
+}
+
+COMPILE_ASSERT(CheckTwoRegsMaskTable(16), check_two_regs_masks_table);
+
+}  // anonymous namespace
+
+const ResourceMask* ResourceMaskCache::GetMask(const ResourceMask& mask) {
+  // Instead of having a deduplication map, we shall just use pre-defined constexpr
+  // masks for the common cases. At most one of the these special bits is allowed:
+  constexpr ResourceMask kAllowedSpecialBits = ResourceMask::Bit(ResourceMask::kFPStatus)
+      .Union(ResourceMask::Bit(ResourceMask::kCCode))
+      .Union(kEncodeHeapRef).Union(kEncodeLiteral).Union(kEncodeDalvikReg);
+  const ResourceMask* res = nullptr;
+  // Limit to low 32 regs and the kAllowedSpecialBits.
+  if ((mask.masks_[0] >> 32) == 0u && (mask.masks_[1] & ~kAllowedSpecialBits.masks_[1]) == 0u) {
+    // Check if it's only up to two registers.
+    uint32_t low_regs = static_cast<uint32_t>(mask.masks_[0]);
+    uint32_t low_regs_without_lowest = low_regs & (low_regs - 1u);
+    if (low_regs_without_lowest == 0u && IsPowerOfTwo(mask.masks_[1])) {
+      // 0 or 1 register, 0 or 1 bit from kAllowedBits. Use a pre-defined mask.
+      size_t index = (mask.masks_[1] != 0u) ? CLZ(mask.masks_[1]) : 0u;
+      DCHECK_LT(index, arraysize(kNoRegMasks));
+      res = (low_regs != 0) ? &kSingleRegMasks[SingleRegMaskIndex(index, CTZ(low_regs))]
+                            : &kNoRegMasks[index];
+    } else if (IsPowerOfTwo(low_regs_without_lowest) && mask.masks_[1] == 0u) {
+      // 2 registers and no other flags. Use predefined mask if higher reg is < 16.
+      if (low_regs_without_lowest < (1u << 16)) {
+        res = &kTwoRegsMasks[TwoRegsIndex(CTZ(low_regs_without_lowest), CTZ(low_regs))];
+      }
+    }
+  } else if (mask.Equals(kEncodeAll)) {
+    res = &kEncodeAll;
+  }
+  if (res != nullptr) {
+    DCHECK(res->Equals(mask))
+        << "(" << std::hex << std::setw(16) << mask.masks_[0]
+        << ", "<< std::hex << std::setw(16) << mask.masks_[1]
+        << ") != (" << std::hex << std::setw(16) << res->masks_[0]
+        << ", "<< std::hex << std::setw(16) << res->masks_[1] << ")";
+    return res;
+  }
+
+  // TODO: Deduplicate. (At least the most common masks.)
+  void* mem = allocator_->Alloc(sizeof(ResourceMask), kArenaAllocLIRResourceMask);
+  return new (mem) ResourceMask(mask);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/resource_mask.h b/compiler/dex/quick/resource_mask.h
new file mode 100644
index 0000000..12ce98a
--- /dev/null
+++ b/compiler/dex/quick/resource_mask.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_
+#define ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_
+
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "dex/reg_storage.h"
+
+namespace art {
+
+class ArenaAllocator;
+
+/**
+ * @brief Resource mask for LIR insn uses or defs.
+ * @detail Def/Use mask used for checking dependencies between LIR insns in local
+ * optimizations such as load hoisting.
+ */
+class ResourceMask {
+ private:
+  constexpr ResourceMask(uint64_t mask1, uint64_t mask2)
+      : masks_{ mask1, mask2 } {  // NOLINT
+  }
+
+ public:
+  /*
+   * Def/Use encoding in 128-bit use_mask/def_mask.  Low positions used for target-specific
+   * registers (and typically use the register number as the position).  High positions
+   * reserved for common and abstract resources.
+   */
+  enum ResourceBit {
+    kMustNotAlias = 127,
+    kHeapRef = 126,         // Default memory reference type.
+    kLiteral = 125,         // Literal pool memory reference.
+    kDalvikReg = 124,       // Dalvik v_reg memory reference.
+    kFPStatus = 123,
+    kCCode = 122,
+    kLowestCommonResource = kCCode,
+    kHighestCommonResource = kMustNotAlias
+  };
+
+  // Default-constructible.
+  constexpr ResourceMask()
+    : masks_ { 0u, 0u } {
+  }
+
+  // Copy-constructible and copyable.
+  ResourceMask(const ResourceMask& other) = default;
+  ResourceMask& operator=(const ResourceMask& other) = default;
+
+  static constexpr ResourceMask RawMask(uint64_t mask1, uint64_t mask2) {
+    return ResourceMask(mask1, mask2);
+  }
+
+  static constexpr ResourceMask Bit(size_t bit) {
+    return ResourceMask(bit >= 64u ? 0u : UINT64_C(1) << bit,
+                        bit >= 64u ? UINT64_C(1) << (bit - 64u) : 0u);
+  }
+
+  // Two consecutive bits. The start_bit must be even.
+  static constexpr ResourceMask TwoBits(size_t start_bit) {
+    return
+        DCHECK_CONSTEXPR((start_bit & 1u) == 0u, << start_bit << " isn't even", Bit(0))
+        ResourceMask(start_bit >= 64u ? 0u : UINT64_C(3) << start_bit,
+                     start_bit >= 64u ? UINT64_C(3) << (start_bit - 64u) : 0u);
+  }
+
+  static constexpr ResourceMask NoBits() {
+    return ResourceMask(UINT64_C(0), UINT64_C(0));
+  }
+
+  static constexpr ResourceMask AllBits() {
+    return ResourceMask(~UINT64_C(0), ~UINT64_C(0));
+  }
+
+  constexpr ResourceMask Union(const ResourceMask& other) const {
+    return ResourceMask(masks_[0] | other.masks_[0], masks_[1] | other.masks_[1]);
+  }
+
+  constexpr ResourceMask Intersection(const ResourceMask& other) const {
+    return ResourceMask(masks_[0] & other.masks_[0], masks_[1] & other.masks_[1]);
+  }
+
+  constexpr ResourceMask Without(const ResourceMask& other) const {
+    return ResourceMask(masks_[0] & ~other.masks_[0], masks_[1] & ~other.masks_[1]);
+  }
+
+  constexpr bool Equals(const ResourceMask& other) const {
+    return masks_[0] == other.masks_[0] && masks_[1] == other.masks_[1];
+  }
+
+  constexpr bool Intersects(const ResourceMask& other) const {
+    return (masks_[0] & other.masks_[0]) != 0u || (masks_[1] & other.masks_[1]) != 0u;
+  }
+
+  void SetBit(size_t bit) {
+    DCHECK_LE(bit, kHighestCommonResource);
+    masks_[bit / 64u] |= UINT64_C(1) << (bit & 63u);
+  }
+
+  constexpr bool HasBit(size_t bit) const {
+    return (masks_[bit / 64u] & (UINT64_C(1) << (bit & 63u))) != 0u;
+  }
+
+  ResourceMask& SetBits(const ResourceMask& other) {
+    masks_[0] |= other.masks_[0];
+    masks_[1] |= other.masks_[1];
+    return *this;
+  }
+
+  ResourceMask& ClearBits(const ResourceMask& other) {
+    masks_[0] &= ~other.masks_[0];
+    masks_[1] &= ~other.masks_[1];
+    return *this;
+  }
+
+ private:
+  uint64_t masks_[2];
+
+  friend class ResourceMaskCache;
+};
+
+constexpr ResourceMask kEncodeNone = ResourceMask::NoBits();
+constexpr ResourceMask kEncodeAll = ResourceMask::AllBits();
+constexpr ResourceMask kEncodeHeapRef = ResourceMask::Bit(ResourceMask::kHeapRef);
+constexpr ResourceMask kEncodeLiteral = ResourceMask::Bit(ResourceMask::kLiteral);
+constexpr ResourceMask kEncodeDalvikReg = ResourceMask::Bit(ResourceMask::kDalvikReg);
+constexpr ResourceMask kEncodeMem = kEncodeLiteral.Union(kEncodeDalvikReg).Union(
+    kEncodeHeapRef).Union(ResourceMask::Bit(ResourceMask::kMustNotAlias));
+
+class ResourceMaskCache {
+ public:
+  explicit ResourceMaskCache(ArenaAllocator* allocator)
+      : allocator_(allocator) {
+  }
+
+  const ResourceMask* GetMask(const ResourceMask& mask);
+
+ private:
+  ArenaAllocator* allocator_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 39a0365..c7e289d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -23,9 +23,9 @@
 #define MAX_ASSEMBLER_RETRIES 50
 
 const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = {
-  { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data",  "0x!0d" },
-  { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0 }, "int 3", "" },
-  { kX86Nop,       kNop,     NO_OPERAND,             { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop",   "" },
+  { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4, false }, "data",  "0x!0d" },
+  { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0, false }, "int 3", "" },
+  { kX86Nop,       kNop,     NO_OPERAND,             { 0, 0, 0x90, 0, 0, 0, 0, 0, false }, "nop",   "" },
 
 #define ENCODING_MAP(opname, mem_use, reg_def, uses_ccodes, \
                      rm8_r8, rm32_r32, \
@@ -34,65 +34,65 @@
                      rm8_i8, rm8_i8_modrm, \
                      rm32_i32, rm32_i32_modrm, \
                      rm32_i8, rm32_i8_modrm) \
-{ kX86 ## opname ## 8MR, kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0 }, #opname "8MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 8AR, kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0 }, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0,            0,      0 }, #opname "8TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 8RR, kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 8RM, kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 8RA, kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0,            0,      0 }, #opname "8RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 8RI, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1 }, #opname "8RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 8MI, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 8AI, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1 }, #opname "8TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 8MR, kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0, true }, #opname "8MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 8AR, kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0, true}, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0,            0,      0, true }, #opname "8TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 8RR, kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 8RM, kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 8RA, kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 8RI, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1, true }, #opname "8RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 8MI, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 8AI, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, true }, #opname "8TI", "fs:[!0d],!1d" }, \
   \
-{ kX86 ## opname ## 16MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0 }, #opname "16MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 16AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0 }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 16TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0,              0,        0 }, #opname "16TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 16RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 16RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 16RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 16RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0,              0,        0 }, #opname "16RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 16RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2 }, #opname "16RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0,        2 }, #opname "16TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 16RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16TI8", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 16MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 16AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 16TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 16RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 16RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 16RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 16RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 16RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2, false }, #opname "16RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 16RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16TI8", "fs:[!0d],!1d" }, \
   \
-{ kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32TI8", "fs:[!0d],!1d" }, \
   \
-{ kX86 ## opname ## 64MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 64AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 64TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 64RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 64RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 64RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "64RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 64RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64TI8", "fs:[!0d],!1d" }
+{ kX86 ## opname ## 64MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 64AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 64TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 64RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 64RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 64RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "64RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 64RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64TI8", "fs:[!0d],!1d" }
 
 ENCODING_MAP(Add, IS_LOAD | IS_STORE, REG_DEF0, 0,
   0x00 /* RegMem8/Reg8 */,     0x01 /* RegMem32/Reg32 */,
@@ -144,114 +144,112 @@
   0x81, 0x7 /* RegMem32/imm32 */, 0x83, 0x7 /* RegMem32/imm8 */),
 #undef ENCODING_MAP
 
-  { kX86Imul16RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RRI", "!0r,!1r,!2d" },
-  { kX86Imul16RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul16RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul16RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RRI", "!0r,!1r,!2d" },
+  { kX86Imul16RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RMI", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul16RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
 
-  { kX86Imul32RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RRI", "!0r,!1r,!2d" },
-  { kX86Imul32RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul32RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-  { kX86Imul32RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RRI8", "!0r,!1r,!2d" },
-  { kX86Imul32RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul32RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul32RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RRI", "!0r,!1r,!2d" },
+  { kX86Imul32RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RMI", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul32RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul32RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RRI8", "!0r,!1r,!2d" },
+  { kX86Imul32RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul32RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
 
-  { kX86Imul64RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RRI", "!0r,!1r,!2d" },
-  { kX86Imul64RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul64RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-  { kX86Imul64RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RRI8", "!0r,!1r,!2d" },
-  { kX86Imul64RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul64RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul64RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RRI", "!0r,!1r,!2d" },
+  { kX86Imul64RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RMI", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul64RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul64RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RRI8", "!0r,!1r,!2d" },
+  { kX86Imul64RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul64RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
 
-  { kX86Mov8MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x88, 0, 0, 0, 0, 0 }, "Mov8MR", "[!0r+!1d],!2r" },
-  { kX86Mov8AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x88, 0, 0, 0, 0, 0 }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8TR", "fs:[!0d],!1r" },
-  { kX86Mov8RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RR", "!0r,!1r" },
-  { kX86Mov8RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RM", "!0r,[!1r+!2d]" },
-  { kX86Mov8RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov8RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RT", "!0r,fs:[!1d]" },
-  { kX86Mov8RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB0, 0, 0, 0, 0, 1 }, "Mov8RI", "!0r,!1d" },
-  { kX86Mov8MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8MI", "[!0r+!1d],!2d" },
-  { kX86Mov8AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8TI", "fs:[!0d],!1d" },
+  { kX86Mov8MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8MR", "[!0r+!1d],!2r" },
+  { kX86Mov8AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8TR", "fs:[!0d],!1r" },
+  { kX86Mov8RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RR", "!0r,!1r" },
+  { kX86Mov8RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RM", "!0r,[!1r+!2d]" },
+  { kX86Mov8RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov8RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RT", "!0r,fs:[!1d]" },
+  { kX86Mov8RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB0, 0, 0, 0, 0, 1, true }, "Mov8RI", "!0r,!1d" },
+  { kX86Mov8MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8MI", "[!0r+!1d],!2d" },
+  { kX86Mov8AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8TI", "fs:[!0d],!1d" },
 
-  { kX86Mov16MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0x66,          0,    0x89, 0, 0, 0, 0, 0 }, "Mov16MR", "[!0r+!1d],!2r" },
-  { kX86Mov16AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0x66,          0,    0x89, 0, 0, 0, 0, 0 }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0 }, "Mov16TR", "fs:[!0d],!1r" },
-  { kX86Mov16RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0 }, "Mov16RR", "!0r,!1r" },
-  { kX86Mov16RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0 }, "Mov16RM", "!0r,[!1r+!2d]" },
-  { kX86Mov16RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0x66,          0,    0x8B, 0, 0, 0, 0, 0 }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov16RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RT", "!0r,fs:[!1d]" },
-  { kX86Mov16RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0x66,          0,    0xB8, 0, 0, 0, 0, 2 }, "Mov16RI", "!0r,!1d" },
-  { kX86Mov16MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0x66,          0,    0xC7, 0, 0, 0, 0, 2 }, "Mov16MI", "[!0r+!1d],!2d" },
-  { kX86Mov16AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0x66,          0,    0xC7, 0, 0, 0, 0, 2 }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" },
+  { kX86Mov16MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0x66,          0,    0x89, 0, 0, 0, 0, 0, false }, "Mov16MR", "[!0r+!1d],!2r" },
+  { kX86Mov16AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0x66,          0,    0x89, 0, 0, 0, 0, 0, false }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0, false }, "Mov16TR", "fs:[!0d],!1r" },
+  { kX86Mov16RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RR", "!0r,!1r" },
+  { kX86Mov16RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RM", "!0r,[!1r+!2d]" },
+  { kX86Mov16RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov16RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0, false }, "Mov16RT", "!0r,fs:[!1d]" },
+  { kX86Mov16RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0x66,          0,    0xB8, 0, 0, 0, 0, 2, false }, "Mov16RI", "!0r,!1d" },
+  { kX86Mov16MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0x66,          0,    0xC7, 0, 0, 0, 0, 2, false }, "Mov16MI", "[!0r+!1d],!2d" },
+  { kX86Mov16AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0x66,          0,    0xC7, 0, 0, 0, 0, 2, false }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2, false }, "Mov16TI", "fs:[!0d],!1d" },
 
-  { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" },
-  { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" },
-  { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" },
-  { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" },
-  { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" },
-  { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" },
-  { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" },
-  { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
+  { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32MR", "[!0r+!1d],!2r" },
+  { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32TR", "fs:[!0d],!1r" },
+  { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" },
+  { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RM", "!0r,[!1r+!2d]" },
+  { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RT", "!0r,fs:[!1d]" },
+  { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4, false }, "Mov32RI", "!0r,!1d" },
+  { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32MI", "[!0r+!1d],!2d" },
+  { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32TI", "fs:[!0d],!1d" },
 
-  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
-  { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { 0,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64MR", "[!0r+!1d],!2r" },
+  { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0, false }, "Mov64TR", "fs:[!0d],!1r" },
+  { kX86Mov64RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" },
+  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" },
+  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RT", "!0r,fs:[!1d]" },
+  { kX86Mov64RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8, false }, "Mov64RI", "!0r,!1d" },
+  { kX86Mov64MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64MI", "[!0r+!1d],!2d" },
+  { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64TI", "fs:[!0d],!1d" },
 
-  { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" },
-  { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0 }, "Mov64TR", "fs:[!0d],!1r" },
-  { kX86Mov64RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RR", "!0r,!1r" },
-  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" },
-  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" },
-  { kX86Mov64RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8 }, "Mov64RI", "!0r,!1d" },
-  { kX86Mov64MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64MI", "[!0r+!1d],!2d" },
-  { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 8 }, "Mov64TI", "fs:[!0d],!1d" },
+  { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RM", "!0r,[!1r+!2d]" },
+  { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
-  { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { REX_W,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RM", "!0r,[!1r+!2d]" },
+  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { 0,     0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RR", "!2c !0r,!1r" },
+  { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RR", "!2c !0r,!1r" },
 
-  { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { REX_W,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-
-  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0,     0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
-  { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RR", "!2c !0r,!1r" },
-
-  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0,     0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
-  { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" },
+  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { 0,     0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
+  { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" },
 
 #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
-{ kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 8RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8RC", "!0r,cl" }, \
-{ kX86 ## opname ## 8MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 8AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1 }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \
+{ kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 8RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8RC", "!0r,cl" }, \
+{ kX86 ## opname ## 8MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 8AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \
   \
-{ kX86 ## opname ## 16RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16RC", "!0r,cl" }, \
-{ kX86 ## opname ## 16MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 16AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1 }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \
+{ kX86 ## opname ## 16RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 16MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 16RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16RC", "!0r,cl" }, \
+{ kX86 ## opname ## 16MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 16AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \
   \
-{ kX86 ## opname ## 32RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32RC", "!0r,cl" }, \
-{ kX86 ## opname ## 32MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \
+{ kX86 ## opname ## 32RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 32MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 32RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32RC", "!0r,cl" }, \
+{ kX86 ## opname ## 32MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \
   \
-{ kX86 ## opname ## 64RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64RC", "!0r,cl" }, \
-{ kX86 ## opname ## 64MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 64AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" }
+{ kX86 ## opname ## 64RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64RC", "!0r,cl" }, \
+{ kX86 ## opname ## 64MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 64AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" }
 
   SHIFT_ENCODING_MAP(Rol, 0x0),
   SHIFT_ENCODING_MAP(Ror, 0x1),
@@ -262,31 +260,31 @@
   SHIFT_ENCODING_MAP(Sar, 0x7),
 #undef SHIFT_ENCODING_MAP
 
-  { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
-  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32RRI", "!0r,!1r,!2d" },
-  { kX86Shld32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32RRI", "!0r,!1r,!2d" },
-  { kX86Shrd32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shld64RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64RRI", "!0r,!1r,!2d" },
-  { kX86Shld64MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shrd64RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64RRI", "!0r,!1r,!2d" },
-  { kX86Shrd64MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0, false }, "Cmc", "" },
+  { kX86Shld32RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32RRI", "!0r,!1r,!2d" },
+  { kX86Shld32MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shrd32RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32RRI", "!0r,!1r,!2d" },
+  { kX86Shrd32MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shld64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64RRI", "!0r,!1r,!2d" },
+  { kX86Shld64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shrd64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" },
+  { kX86Shrd64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
 
-  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
-  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
-  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16RI", "!0r,!1d" },
-  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16MI", "[!0r+!1d],!2d" },
-  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32RI", "!0r,!1d" },
-  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32MI", "[!0r+!1d],!2d" },
-  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64RI", "!0r,!1d" },
-  { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64MI", "[!0r+!1d],!2d" },
-  { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
+  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
+  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
+  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
+  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
+  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
+  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" },
+  { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" },
+  { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
 
-  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0}, "Test32RR", "!0r,!1r" },
-  { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0}, "Test64RR", "!0r,!1r" },
+  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
+  { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" },
 
 #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
                            reg, reg_kind, reg_flags, \
@@ -294,18 +292,18 @@
                            arr, arr_kind, arr_flags, imm, \
                            b_flags, hw_flags, w_flags, \
                            b_format, hw_format, w_format) \
-{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, b_format "!0r" }, \
-{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, b_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, hw_format "!0r" }, \
-{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, hw_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, w_format "!0r" }, \
-{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 64 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #reg, w_format "!0r" }, \
-{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #mem, w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
+{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #reg, b_format "!0r" }, \
+{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #mem, b_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #reg, hw_format "!0r" }, \
+{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #mem, hw_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #reg, w_format "!0r" }, \
+{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #mem, w_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 64 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #reg, w_format "!0r" }, \
+{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #mem, w_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
 
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
@@ -316,33 +314,43 @@
   UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
 #undef UNARY_ENCODING_MAP
 
-  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" },
-  { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
-  { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0,    0, 0, 0, 0 }, "Push32R",  "!0r" },
-  { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
+  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0,     0, 0x99, 0,    0, 0, 0, 0, false }, "Cdq", "" },
+  { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { REX_W, 0, 0x99, 0,    0, 0, 0, 0, false }, "Cqo", "" },
+  { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0,     0, 0x0F, 0xC8, 0, 0, 0, 0, false }, "Bswap32R", "!0r" },
+  { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0,     0, 0x50, 0,    0, 0, 0, 0, false }, "Push32R",  "!0r" },
+  { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0,     0, 0x58, 0,    0, 0, 0, 0, false }, "Pop32R",   "!0r" },
 
 #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
+#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
 #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
   EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0),
-  { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" },
-  { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdMR", "[!0r+!1d],!2r" },
+  { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movss, 0xF3, 0x10, REG_DEF0),
-  { kX86MovssMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssMR", "[!0r+!1d],!2r" },
-  { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovssMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssMR", "[!0r+!1d],!2r" },
+  { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Cvtsi2sd,  0xF2, 0x2A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsi2ss,  0xF3, 0x2A, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2sd,  0xF2, 0x2A, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2ss,  0xF3, 0x2A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvttsd2sqi, 0xF2, 0x2C, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvttss2sqi, 0xF3, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsd2si,  0xF2, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtss2si,  0xF3, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES|REG_USE0),
@@ -383,74 +391,84 @@
   EXT_0F_ENCODING2_MAP(Phaddw,   0x66, 0x38, 0x01, REG_DEF0_USE0),
   EXT_0F_ENCODING2_MAP(Phaddd,   0x66, 0x38, 0x02, REG_DEF0_USE0),
 
-  { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1 }, "PextbRRI", "!0r,!1r,!2d" },
-  { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1 }, "PextwRRI", "!0r,!1r,!2d" },
-  { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1 }, "PextdRRI", "!0r,!1r,!2d" },
+  { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
+  { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
+  { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
 
-  { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuflwRRI", "!0r,!1r,!2d" },
-  { kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuffRRI", "!0r,!1r,!2d" },
+  { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
+  { kX86PshufdRRI,  kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
 
-  { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1 }, "PsrawRI", "!0r,!1d" },
-  { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1 }, "PsradRI", "!0r,!1d" },
-  { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1 }, "PsrlwRI", "!0r,!1d" },
-  { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1 }, "PsrldRI", "!0r,!1d" },
-  { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
-  { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1 }, "PsllwRI", "!0r,!1d" },
-  { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1 }, "PslldRI", "!0r,!1d" },
-  { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
+  { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" },
+  { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" },
+  { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" },
+  { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1, false }, "PsrldRI", "!0r,!1d" },
+  { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1, false }, "PsrlqRI", "!0r,!1d" },
+  { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1, false }, "PsllwRI", "!0r,!1d" },
+  { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
+  { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
 
-  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0 }, "Fild32M", "[!0r,!1d]" },
-  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0 }, "Fild64M", "[!0r,!1d]" },
-  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpsM", "[!0r,!1d]" },
-  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" },
+  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" },
+  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" },
+  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" },
 
   EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
-  { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128MR", "[!0r+!1d],!2r" },
-  { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" },
+  { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" },
 
 
   EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
-  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
-  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsMR", "[!0r+!1d],!2r" },
+  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movaps,    0x0, 0x28, REG_DEF0),
-  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsMR", "[!0r+!1d],!2r" },
-  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsMR", "[!0r+!1d],!2r" },
+  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
-  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRM", "!0r,[!1r+!2d]" },
-  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsMR", "[!0r+!1d],!2r" },
-  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,            { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsMR", "[!0r+!1d],!2r" },
+  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014,           { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
-  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRM", "!0r,[!1r+!2d]" },
-  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsMR", "[!0r+!1d],!2r" },
-  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,            { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsMR", "[!0r+!1d],!2r" },
+  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014,           { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
-  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
-  { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
-  { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
+  EXT_0F_REX_W_ENCODING_MAP(Movqxr, 0x66, 0x6E, REG_DEF0),
+  { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxRR", "!0r,!1r" },
+  { kX86MovqrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxMR", "[!0r+!1d],!2r" },
+  { kX86MovqrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
-  { kX86Set8R, kRegCond,              IS_BINARY_OP   | REG_DEF0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" },
-  { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
-  { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP     | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
+  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxRR", "!0r,!1r" },
+  { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxMR", "[!0r+!1d],!2r" },
+  { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  { kX86MovsxdRR, kRegReg,      IS_BINARY_OP | REG_DEF0 | REG_USE1,              { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRR", "!0r,!1r" },
+  { kX86MovsxdRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1,  { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRM", "!0r,[!1r+!2d]" },
+  { kX86MovsxdRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+
+  { kX86Set8R, kRegCond,              IS_BINARY_OP   | REG_DEF0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, true }, "Set8R", "!1c !0r" },
+  { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8M", "!2c [!0r+!1d]" },
+  { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP     | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
 
   // TODO: load/store?
   // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly.
-  { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" },
+  { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
 
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
+  EXT_0F_ENCODING_MAP(Imul64,  REX_W, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
 
-  { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
-  { kX86CmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
-  { kX86CmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86LockCmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
-  { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86LockCmpxchg8bM, kMem,   IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1d]" },
-  { kX86LockCmpxchg8bA, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
-  { kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0 }, "Xchg", "[!0r+!1d],!2r" },
+  { kX86CmpxchgRR, kRegRegStore,  IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES,   { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "!0r,!1r" },
+  { kX86CmpxchgMR, kMemReg,       IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1d],!2r" },
+  { kX86CmpxchgAR, kArrayReg,     IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86LockCmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
+  { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86LockCmpxchg64M, kMem,     IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1d]" },
+  { kX86LockCmpxchg64A, kArray,   IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES,  { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
+  { kX86XchgMR, kMemReg,          IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02,          { 0, 0, 0x87, 0, 0, 0, 0, 0, false }, "Xchg", "[!0r+!1d],!2r" },
 
   EXT_0F_ENCODING_MAP(Movzx8,  0x00, 0xB6, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
@@ -458,28 +476,110 @@
   EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0),
 #undef EXT_0F_ENCODING_MAP
 
-  { kX86Jcc8,  kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x70, 0,    0, 0, 0, 0 }, "Jcc8",  "!1c !0t" },
-  { kX86Jcc32, kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x0F, 0x80, 0, 0, 0, 0 }, "Jcc32", "!1c !0t" },
-  { kX86Jmp8,  kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xEB, 0,    0, 0, 0, 0 }, "Jmp8",  "!0t" },
-  { kX86Jmp32, kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xE9, 0,    0, 0, 0, 0 }, "Jmp32", "!0t" },
-  { kX86JmpR,  kJmp,  IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xFF, 0,    0, 4, 0, 0 }, "JmpR",  "!0r" },
-  { kX86Jecxz8, kJmp, NO_OPERAND   | IS_BRANCH | NEEDS_FIXUP | REG_USEC,    { 0,             0, 0xE3, 0,    0, 0, 0, 0 }, "Jecxz", "!0t" },
-  { kX86JmpT,  kJmp,  IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 4, 0, 0 }, "JmpT",  "fs:[!0d]" },
-  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xE8, 0,    0, 0, 0, 0 }, "CallR", "!0r" },
-  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0,        { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
-  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD | REG_USE01,       { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
-  { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 2, 0, 0 }, "CallT", "fs:[!0d]" },
-  { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4 }, "CallI", "!0d" },
-  { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0 }, "Ret", "" },
+  { kX86Jcc8,  kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x70, 0,    0, 0, 0, 0, false }, "Jcc8",  "!1c !0t" },
+  { kX86Jcc32, kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x0F, 0x80, 0, 0, 0, 0, false }, "Jcc32", "!1c !0t" },
+  { kX86Jmp8,  kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xEB, 0,    0, 0, 0, 0, false }, "Jmp8",  "!0t" },
+  { kX86Jmp32, kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xE9, 0,    0, 0, 0, 0, false }, "Jmp32", "!0t" },
+  { kX86JmpR,  kJmp,  IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xFF, 0,    0, 4, 0, 0, false }, "JmpR",  "!0r" },
+  { kX86Jecxz8, kJmp, NO_OPERAND   | IS_BRANCH | NEEDS_FIXUP | REG_USEC,    { 0,             0, 0xE3, 0,    0, 0, 0, 0, false }, "Jecxz", "!0t" },
+  { kX86JmpT,  kJmp,  IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 4, 0, 0, false }, "JmpT",  "fs:[!0d]" },
+  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xE8, 0,    0, 0, 0, 0, false }, "CallR", "!0r" },
+  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0,        { 0,             0, 0xFF, 0,    0, 2, 0, 0, false }, "CallM", "[!0r+!1d]" },
+  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD | REG_USE01,       { 0,             0, 0xFF, 0,    0, 2, 0, 0, false }, "CallA", "[!0r+!1r<<!2d+!3d]" },
+  { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 2, 0, 0, false }, "CallT", "fs:[!0d]" },
+  { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4, false }, "CallI", "!0d" },
+  { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0, false }, "Ret", "" },
 
-  { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" },
-  { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
-  { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr",      "!0r,!1d" },
-  { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
+  { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" },
+  { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
+  { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr",      "!0r,!1d" },
+  { kX86RepneScasw,    kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" },
 };
 
-size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement,
-                               int reg_r, int reg_x, bool has_sib) {
+static bool NeedsRex(int32_t raw_reg) {
+  return RegStorage::RegNum(raw_reg) > 7;
+}
+
+static uint8_t LowRegisterBits(int32_t raw_reg) {
+  uint8_t low_reg = RegStorage::RegNum(raw_reg) & kRegNumMask32;  // 3 bits
+  DCHECK_LT(low_reg, 8);
+  return low_reg;
+}
+
+static bool HasModrm(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    case kNullary: return false;
+    case kRegOpcode: return false;
+    default: return true;
+  }
+}
+
+static bool HasSib(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    case kArray: return true;
+    case kArrayReg: return true;
+    case kRegArray: return true;
+    case kArrayImm: return true;
+    case kRegArrayImm: return true;
+    case kShiftArrayImm: return true;
+    case kShiftArrayCl: return true;
+    case kArrayCond: return true;
+    case kCall:
+      switch (entry->opcode) {
+        case kX86CallA: return true;
+        default: return false;
+      }
+    case kPcRel: return true;
+       switch (entry->opcode) {
+         case kX86PcRelLoadRA: return true;
+         default: return false;
+        }
+    default: return false;
+  }
+}
+
+static bool ModrmIsRegReg(const X86EncodingMap* entry) {
+  switch (entry->kind) {
+    // There is no modrm for this kind of instruction, therefore the reg doesn't form part of the
+    // modrm:
+    case kNullary: return true;
+    case kRegOpcode: return true;
+    case kMovRegImm: return true;
+    // Regular modrm value of 3 cases, when there is one register the other register holds an
+    // opcode so the base register is special.
+    case kReg: return true;
+    case kRegReg: return true;
+    case kRegRegStore: return true;
+    case kRegImm: return true;
+    case kRegRegImm: return true;
+    case kRegRegImmStore: return true;
+    case kShiftRegImm: return true;
+    case kShiftRegCl: return true;
+    case kRegCond: return true;
+    case kRegRegCond: return true;
+    case kJmp:
+      switch (entry->opcode) {
+        case kX86JmpR: return true;
+        default: return false;
+      }
+    case kCall:
+      switch (entry->opcode) {
+        case kX86CallR: return true;
+        default: return false;
+      }
+    default: return false;
+  }
+}
+
+size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
+                               int32_t raw_base, int32_t displacement) {
+  bool has_modrm = HasModrm(entry);
+  bool has_sib = HasSib(entry);
+  bool r8_form = entry->skeleton.r8_form;
+  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
+  if (has_sib) {
+    DCHECK(!modrm_is_reg_reg);
+  }
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -487,9 +587,21 @@
       ++size;
     }
   }
-  if ((NeedsRex(base) || NeedsRex(reg_r) || NeedsRex(reg_x)) &&
-       entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
-    ++size;  // REX_R
+  if (Gen64Bit() || kIsDebugBuild) {
+    bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base);
+    if (r8_form) {
+      // Do we need an empty REX prefix to normalize byte registers?
+      registers_need_rex_prefix = registers_need_rex_prefix || (RegStorage::RegNum(raw_reg) >= 4);
+      registers_need_rex_prefix = registers_need_rex_prefix ||
+          (modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4));
+    }
+    if (registers_need_rex_prefix) {
+      DCHECK(Gen64Bit()) << "Attempt to use a 64-bit only addressable register "
+          << RegStorage::RegNum(raw_reg) << " with instruction " << entry->name;
+      if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
+        ++size;  // rex
+      }
+    }
   }
   ++size;  // opcode
   if (entry->skeleton.opcode == 0x0F) {
@@ -498,133 +610,136 @@
       ++size;
     }
   }
-  ++size;  // modrm
-  if (has_sib || LowRegisterBits(RegStorage::RegNum(base)) == rs_rX86_SP.GetRegNum()
-      || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
-    // SP requires a SIB byte.
-    // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
-    ++size;
+  if (has_modrm) {
+    ++size;  // modrm
   }
-  if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) {
-    // BP requires an explicit displacement, even when it's 0.
-    if (entry->opcode != kX86Lea32RA) {
-      DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name;
+  if (!modrm_is_reg_reg) {
+    if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum()
+        || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
+      // SP requires a SIB byte.
+      // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
+      ++size;
     }
-    size += IS_SIMM8(displacement) ? 1 : 4;
+    if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
+      // BP requires an explicit displacement, even when it's 0.
+      if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
+        DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
+      }
+      size += IS_SIMM8(displacement) ? 1 : 4;
+    }
   }
   size += entry->skeleton.immediate_bytes;
   return size;
 }
 
-int X86Mir2Lir::GetInsnSize(LIR* lir) {
+size_t X86Mir2Lir::GetInsnSize(LIR* lir) {
   DCHECK(!IsPseudoLirOp(lir->opcode));
   const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode];
+  DCHECK_EQ(entry->opcode, lir->opcode) << entry->name;
+
   switch (entry->kind) {
     case kData:
-      return 4;  // 4 bytes of data
+      return 4;  // 4 bytes of data.
     case kNop:
-      return lir->operands[0];  // length of nop is sole operand
+      return lir->operands[0];  // Length of nop is sole operand.
     case kNullary:
-      return 1;  // 1 byte of opcode
-    case kPrefix2Nullary:
-      return 3;  // 1 byte of opcode + 2 prefixes
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0);
     case kRegOpcode:  // lir operands - 0: reg
-      // substract 1 for modrm
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - 1;
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kReg:  // lir operands - 0: reg
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kMem:  // lir operands - 0: base, 1: disp
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[1],
-                         lir->operands[2], NO_REG, false);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
     case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[1],
-                         lir->operands[2], NO_REG, false);
+      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         lir->operands[4], lir->operands[1], true);
+      return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
+                         lir->operands[3]);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
-      return ComputeSize(entry, 0, lir->operands[0], lir->operands[1], NO_REG, false);
+      // Thread displacement size is always 32bit.
+      return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, 0x12345678);
     case kRegReg:  // lir operands - 0: reg1, 1: reg2
-      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
     case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
-      return ComputeSize(entry, 0, 0, lir->operands[1], lir->operands[0], false);
+      return ComputeSize(entry, lir->operands[1], NO_REG, lir->operands[0], 0);
     case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
-      return ComputeSize(entry, lir->operands[1], lir->operands[2],
-                         lir->operands[0], NO_REG, false);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-      return ComputeSize(entry, lir->operands[1], lir->operands[4],
-                         lir->operands[0], lir->operands[2], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
+                         lir->operands[4]);
     case kRegThread:  // lir operands - 0: reg, 1: disp
-      // displacement size is always 32bit
-      return ComputeSize(entry, 0, 0x12345678, lir->operands[0], NO_REG, false);
+      // Thread displacement size is always 32bit.
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0x12345678);
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
-      size_t size = ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
+      // AX opcodes don't require the modrm byte.
       if (entry->skeleton.ax_opcode == 0) {
         return size;
       } else {
-        // AX opcodes don't require the modrm byte.
-        int reg = lir->operands[0];
-        return size - (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() ? 1 : 0);
+        return size - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0);
       }
     }
     case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[1],
-                         NO_REG, lir->operands[0], false);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
     case kThreadImm:  // lir operands - 0: disp, 1: imm
-      // displacement size is always 32bit
-      return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
-    case kRegRegImm:  // lir operands - 0: reg, 1: reg, 2: imm
-    case kRegRegImmRev:
-      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
+      // Thread displacement size is always 32bit.
+      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
+    case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
+      // Note: RegRegImm form passes reg2 as index but encodes it using base.
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, 0);
+    case kRegRegImmStore:  // lir operands - 0: reg2, 1: reg1, 2: imm
+      // Note: RegRegImmStore form passes reg1 as index but encodes it using base.
+      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, 0);
     case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
-      return ComputeSize(entry, lir->operands[1], lir->operands[2],
-                         lir->operands[0], NO_REG, false);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kRegArrayImm:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
-      return ComputeSize(entry, lir->operands[1], lir->operands[4],
-                         lir->operands[0], lir->operands[2], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
+                         lir->operands[4]);
     case kMovRegImm:  // lir operands - 0: reg, 1: immediate
-      return (entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])?1:0) +
-             1 + entry->skeleton.immediate_bytes;
+      return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 +
+          entry->skeleton.immediate_bytes;
     case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) -
-             (lir->operands[1] == 1 ? 1 : 0);
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0) -
+          (lir->operands[1] == 1 ? 1 : 0);
     case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false) -
-             (lir->operands[2] == 1 ? 1 : 0);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]) -
+          (lir->operands[2] == 1 ? 1 : 0);
     case kShiftArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true) -
-             (lir->operands[4] == 1 ? 1 : 0);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]) -
+          (lir->operands[4] == 1 ? 1 : 0);
     case kShiftRegCl:  // lir operands - 0: reg, 1: cl
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[1]));
+      // Note: ShiftRegCl form passes reg as reg but encodes it using base.
+      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
     case kShiftMemCl:  // lir operands - 0: base, 1: disp, 2: cl
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
-    case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         lir->operands[4], lir->operands[1], true);
+      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2]));
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
+    case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cl
+      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4]));
+      return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
+                         lir->operands[3]);
     case kRegCond:  // lir operands - 0: reg, 1: cond
-      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
     case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
+      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
     case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
-      return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                         NO_REG, lir->operands[1], true);
-    case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: cond
-      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
-    case kRegMemCond:  // lir operands - 0: reg, 1: reg, 2: disp, 3:cond
-      return ComputeSize(entry, lir->operands[1], lir->operands[2],
-                         lir->operands[0], lir->operands[1], false);
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
+    case kRegRegCond:  // lir operands - 0: reg1, 1: reg2, 2: cond
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
+    case kRegMemCond:  // lir operands - 0: reg, 1: base, 2: disp, 3:cond
+      DCHECK_EQ(false, entry->skeleton.r8_form);
+      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -638,8 +753,8 @@
       } else if (lir->opcode == kX86Jmp32) {
         return 5;  // opcode + rel32
       } else if (lir->opcode == kX86JmpT) {
-        // displacement size is always 32bit
-        return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
+        // Thread displacement size is always 32bit.
+        return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
       } else {
         DCHECK(lir->opcode == kX86JmpR);
         if (NeedsRex(lir->operands[0])) {
@@ -653,13 +768,12 @@
         case kX86CallI: return 5;  // opcode 0:disp
         case kX86CallR: return 2;  // opcode modrm
         case kX86CallM:  // lir operands - 0: base, 1: disp
-          return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
+          return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
         case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-          return ComputeSize(entry, lir->operands[0], lir->operands[3],
-                             NO_REG, lir->operands[1], true);
+          return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
         case kX86CallT:  // lir operands - 0: disp
-          // displacement size is always 32bit
-          return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
+          // Thread displacement size is always 32bit.
+          return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
         default:
           break;
       }
@@ -667,43 +781,83 @@
     case kPcRel:
       if (entry->opcode == kX86PcRelLoadRA) {
         // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
-        return ComputeSize(entry, lir->operands[1], 0x12345678,
-                           lir->operands[0], lir->operands[2], true);
+        // Force the displacement size to 32bit, it will hold a computed offset later.
+        return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
+                           0x12345678);
       } else {
-        DCHECK(entry->opcode == kX86PcRelAdr);
+        DCHECK_EQ(entry->opcode, kX86PcRelAdr);
         return 5;  // opcode with reg + 4 byte immediate
       }
     case kMacro:  // lir operands - 0: reg
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0,
-                      lir->operands[0], NO_REG, false) -
-          // shorter ax encoding
-          (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
-    default:
+          ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
+                      lir->operands[0], NO_REG, NO_REG, 0) -
+              // Shorter ax encoding.
+              (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
+    case kUnimplemented:
       break;
   }
   UNIMPLEMENTED(FATAL) << "Unimplemented size encoding for: " << entry->name;
   return 0;
 }
 
-void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) {
-  EmitPrefix(entry, NO_REG, NO_REG, NO_REG);
+static uint8_t ModrmForDisp(int base, int disp) {
+  // BP requires an explicit disp, so do not omit it in the 0 case
+  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
+    return 0;
+  } else if (IS_SIMM8(disp)) {
+    return 1;
+  } else {
+    return 2;
+  }
+}
+
+void X86Mir2Lir::CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg) {
+  if (kIsDebugBuild) {
+    // Sanity check r8_form is correctly specified.
+    if (entry->skeleton.r8_form) {
+      CHECK(strchr(entry->name, '8') != nullptr) << entry->name;
+    } else {
+      if (entry->skeleton.immediate_bytes != 1) {  // Ignore ...I8 instructions.
+        if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8")) {
+          CHECK(strchr(entry->name, '8') == nullptr) << entry->name;
+        }
+      }
+    }
+    if (RegStorage::RegNum(raw_reg) >= 4) {
+      // ah, bh, ch and dh are not valid registers in 32-bit.
+      CHECK(Gen64Bit() || !entry->skeleton.r8_form)
+               << "Invalid register " << static_cast<int>(RegStorage::RegNum(raw_reg))
+               << " for instruction " << entry->name << " in "
+               << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    }
+  }
 }
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
-                            uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
+                            int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
   // REX.WRXB
   // W - 64-bit operand
   // R - MODRM.reg
   // X - SIB.index
   // B - MODRM.rm/SIB.base
-  bool force = false;
   bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W);
-  bool r = NeedsRex(reg_r);
-  bool x = NeedsRex(reg_x);
-  bool b = NeedsRex(reg_b);
-  uint8_t rex = force ? 0x40 : 0;
+  bool r = NeedsRex(raw_reg_r);
+  bool x = NeedsRex(raw_reg_x);
+  bool b = NeedsRex(raw_reg_b);
+  bool r8_form = entry->skeleton.r8_form;
+  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
+
+  uint8_t rex = 0;
+  if (r8_form) {
+    // Do we need an empty REX prefix to normalize byte register addressing?
+    if (RegStorage::RegNum(raw_reg_r) >= 4) {
+      rex |= 0x40;  // REX.0000
+    } else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) {
+      rex |= 0x40;  // REX.0000
+    }
+  }
   if (w) {
     rex |= 0x48;  // REX.W000
   }
@@ -718,7 +872,7 @@
   }
   if (entry->skeleton.prefix1 != 0) {
     if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
-      // 64 bit adresses by GS, not FS
+      // 64 bit addresses by GS, not FS.
       code_buffer_.push_back(THREAD_PREFIX_GS);
     } else {
       if (entry->skeleton.prefix1 == REX_W) {
@@ -742,6 +896,7 @@
     DCHECK_EQ(0, entry->skeleton.prefix2);
   }
   if (rex != 0) {
+    DCHECK(Gen64Bit());
     code_buffer_.push_back(rex);
   }
 }
@@ -761,28 +916,13 @@
   }
 }
 
-void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
-}
-
 void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                                     uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
-  EmitPrefix(entry, reg_r, reg_x, reg_b);
+                                     int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
+  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b);
   EmitOpcode(entry);
 }
 
-static uint8_t ModrmForDisp(int base, int disp) {
-  // BP requires an explicit disp, so do not omit it in the 0 case
-  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
-    return 0;
-  } else if (IS_SIMM8(disp)) {
-    return 1;
-  } else {
-    return 2;
-  }
-}
-
-void X86Mir2Lir::EmitDisp(uint8_t base, int disp) {
+void X86Mir2Lir::EmitDisp(uint8_t base, int32_t disp) {
   // BP requires an explicit disp, so do not omit it in the 0 case
   if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
     return;
@@ -809,13 +949,12 @@
   }
 }
 
-void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp) {
-  DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
-  DCHECK_LT(RegStorage::RegNum(base), 8);
-  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (RegStorage::RegNum(reg_or_opcode) << 3) |
-     RegStorage::RegNum(base);
+void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) {
+  DCHECK_LT(reg_or_opcode, 8);
+  DCHECK_LT(base, 8);
+  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
   code_buffer_.push_back(modrm);
-  if (RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()) {
+  if (base == rs_rX86_SP.GetRegNum()) {
     // Special SIB for SP base
     code_buffer_.push_back(0 << 6 | rs_rX86_SP.GetRegNum() << 3 | rs_rX86_SP.GetRegNum());
   }
@@ -823,7 +962,7 @@
 }
 
 void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index,
-                                  int scale, int disp) {
+                                  int scale, int32_t disp) {
   DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
   uint8_t modrm = (ModrmForDisp(base, disp) << 6) | RegStorage::RegNum(reg_or_opcode) << 3 |
       rs_rX86_SP.GetRegNum();
@@ -848,11 +987,7 @@
       code_buffer_.push_back((imm >> 8) & 0xFF);
       break;
     case 4:
-      if (imm <0) {
-        CHECK_EQ((-imm) & 0x0FFFFFFFFl, -imm);
-      } else {
-        CHECK_EQ(imm & 0x0FFFFFFFFl, imm);
-      }
+      DCHECK(IS_SIMM32(imm));
       code_buffer_.push_back(imm & 0xFF);
       code_buffer_.push_back((imm >> 8) & 0xFF);
       code_buffer_.push_back((imm >> 16) & 0xFF);
@@ -875,128 +1010,126 @@
   }
 }
 
-void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
-  // There's no 3-byte instruction with +rd
-  DCHECK(entry->skeleton.opcode != 0x0F ||
-         (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
-  DCHECK(!RegStorage::IsFloat(reg));
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  code_buffer_.back() += RegStorage::RegNum(reg);
+void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
-        << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
+  // There's no 3-byte instruction with +rd
+  DCHECK(entry->skeleton.opcode != 0x0F ||
+         (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
+  DCHECK(!RegStorage::IsFloat(raw_reg));
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  code_buffer_.back() += low_reg;
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
+void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) {
-  EmitPrefix(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index,
-                             int scale, int disp) {
-  EmitPrefixAndOpcode(entry, NO_REG, index, base);
-  index = LowRegisterBits(index);
-  base = LowRegisterBits(base);
-  EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
+void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
+                             int scale, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
+  uint8_t low_index = LowRegisterBits(raw_index);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-uint8_t X86Mir2Lir::LowRegisterBits(uint8_t reg) {
-  uint8_t res = reg;
-  res = reg & kRegNumMask32;  // 3 bits
-  return res;
-}
-
-bool X86Mir2Lir::NeedsRex(uint8_t reg) {
-  return RegStorage::RegNum(reg) > 7;
-}
-
-void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry,
-                       uint8_t base, int disp, uint8_t reg) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, base);
-  reg = LowRegisterBits(reg);
-  base = LowRegisterBits(base);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL ||
-           entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM)
-        << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  EmitModrmDisp(reg, base, disp);
+void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                            int32_t raw_reg) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(low_reg, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry,
-                       uint8_t reg, uint8_t base, int disp) {
+void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
+                            int32_t disp) {
   // Opcode will flip operands.
-  EmitMemReg(entry, base, disp, reg);
+  EmitMemReg(entry, raw_base, disp, raw_reg);
 }
 
-void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base,
-                              uint8_t index, int scale, int disp) {
-  EmitPrefixAndOpcode(entry, reg, index, base);
-  reg = LowRegisterBits(reg);
-  index = LowRegisterBits(index);
-  base = LowRegisterBits(base);
-  EmitModrmSibDisp(reg, base, index, scale, disp);
+void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
+                              int32_t raw_index, int scale, int32_t disp) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t low_index = LowRegisterBits(raw_index);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmSibDisp(low_reg, low_base, low_index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
-                              int disp, uint8_t reg) {
+void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
+                              int scale, int32_t disp, int32_t raw_reg) {
   // Opcode will flip operands.
-  EmitRegArray(entry, reg, base, index, scale, disp);
+  EmitRegArray(entry, raw_reg, raw_base, raw_index, scale, disp);
 }
 
-void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
-                              int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, NO_REG, index, base);
-  index = LowRegisterBits(index);
-  base = LowRegisterBits(base);
-  EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
+void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                            int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
 }
 
-void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) {
+void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry,
+                              int32_t raw_base, int32_t raw_index, int scale, int32_t disp,
+                              int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
+  uint8_t low_index = LowRegisterBits(raw_index);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  EmitImm(entry, imm);
+}
+
+void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
-        << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  EmitModrmThread(RegStorage::RegNum(reg));
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  EmitModrmThread(low_reg);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -1006,79 +1139,68 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) {
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
-  reg1 = LowRegisterBits(reg1);
-  reg2 = LowRegisterBits(reg2);
-  DCHECK_LT(RegStorage::RegNum(reg1), 8);
-  DCHECK_LT(RegStorage::RegNum(reg2), 8);
-  uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
+void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
+  CheckValidByteRegister(entry, raw_reg1);
+  CheckValidByteRegister(entry, raw_reg2);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
+  uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry,
-                          uint8_t reg1, uint8_t reg2, int32_t imm) {
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
-  reg1 = LowRegisterBits(reg1);
-  reg2 = LowRegisterBits(reg2);
-  DCHECK_LT(RegStorage::RegNum(reg1), 8);
-  DCHECK_LT(RegStorage::RegNum(reg2), 8);
-  uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
+void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
+                               int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
+  uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
 }
 
-void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry,
-                                  uint8_t reg1, uint8_t reg2, int32_t imm) {
-  EmitRegRegImm(entry, reg2, reg1, imm);
-}
-
 void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
-                               uint8_t reg, uint8_t base, int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, reg, NO_REG, base);
-  reg = LowRegisterBits(reg);
-  base = LowRegisterBits(base);
-  DCHECK(!RegStorage::IsFloat(reg));
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  EmitModrmDisp(reg, base, disp);
+                               int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) {
+  DCHECK(!RegStorage::IsFloat(raw_reg));
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(low_reg, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
 }
 
 void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry,
-                               uint8_t base, int disp, uint8_t reg, int32_t imm) {
-  EmitRegMemImm(entry, reg, base, disp, imm);
+                               int32_t raw_base, int32_t disp, int32_t raw_reg, int32_t imm) {
+  // Opcode will flip operands.
+  EmitRegMemImm(entry, raw_reg, raw_base, disp, imm);
 }
 
-void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, reg);
-  if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
+void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
+  if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
-    reg = LowRegisterBits(reg);
+    uint8_t low_reg = LowRegisterBits(raw_reg);
     EmitOpcode(entry);
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
     code_buffer_.push_back(modrm);
   }
   EmitImm(entry, imm);
 }
 
-void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  EmitImm(entry, imm);
-}
-
-void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) {
-  EmitPrefixAndOpcode(entry);
+void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1088,11 +1210,11 @@
   DCHECK_EQ(entry->skeleton.ax_opcode, 0);
 }
 
-void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, reg);
-  reg = LowRegisterBits(reg);
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg));
+void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  code_buffer_.push_back(0xB8 + low_reg);
   switch (entry->skeleton.immediate_bytes) {
     case 4:
       code_buffer_.push_back(imm & 0xFF);
@@ -1116,9 +1238,9 @@
   }
 }
 
-void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, reg);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1128,13 +1250,8 @@
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  if (RegStorage::RegNum(reg) >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
-        << static_cast<int>(RegStorage::RegNum(reg))
-        << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   if (imm != 1) {
     DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
@@ -1143,40 +1260,40 @@
   }
 }
 
-void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) {
-  DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
-  EmitPrefix(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) {
+  CheckValidByteRegister(entry, raw_reg);
+  DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
-                                int displacement, uint8_t cl) {
-  DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
-  EmitPrefix(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base,
+                                int32_t displacement, int32_t raw_cl) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(RegStorage::RegNum(base), 8);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, displacement);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base,
-                                int displacement, int imm) {
-  EmitPrefix(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                                 int32_t imm) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1186,7 +1303,8 @@
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   if (imm != 1) {
     DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
     DCHECK(IS_SIMM8(imm));
@@ -1194,23 +1312,26 @@
   }
 }
 
-void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
-  EmitPrefix(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) {
+  CheckValidByteRegister(entry, raw_reg);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
   DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
-  code_buffer_.push_back(0x90 | condition);
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.push_back(0x90 | cc);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
-void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement,
-                             uint8_t condition) {
+void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
+                             int32_t cc) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
     if (entry->skeleton.prefix2 != 0) {
@@ -1223,61 +1344,63 @@
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
   DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
-  code_buffer_.push_back(0x90 | condition);
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.push_back(0x90 | cc);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
-void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2,
-                                uint8_t condition) {
-  // Generate prefix and opcode without the condition
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
-  reg1 = LowRegisterBits(reg1);
-  reg2 = LowRegisterBits(reg2);
+void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
+                                int32_t cc) {
+  // Generate prefix and opcode without the condition.
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
-  DCHECK_LE(condition, 0xF);
-  code_buffer_.back() += condition;
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.back() += cc;
 
-  // Not expecting to have to encode immediate or do anything special for ModR/M since there are two registers.
+  // Not expecting to have to encode immediate or do anything special for ModR/M since there are
+  // two registers.
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
 
-  // Check that registers requested for encoding are sane.
-  DCHECK_LT(RegStorage::RegNum(reg1), 8);
-  DCHECK_LT(RegStorage::RegNum(reg2), 8);
-
   // For register to register encoding, the mod is 3.
   const uint8_t mod = (3 << 6);
 
   // Encode the ModR/M byte now.
-  const uint8_t modrm = mod | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
+  const uint8_t modrm = mod | (low_reg1 << 3) | low_reg2;
   code_buffer_.push_back(modrm);
 }
 
-void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base,
-                                int displacement, uint8_t condition) {
-  // Generate prefix and opcode without the condition
-  EmitPrefixAndOpcode(entry, reg1, NO_REG, base);
-  reg1 = LowRegisterBits(reg1);
-  base = LowRegisterBits(base);
+void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base,
+                                int32_t disp, int32_t cc) {
+  // Generate prefix and opcode without the condition.
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
-  DCHECK_LE(condition, 0xF);
-  code_buffer_.back() += condition;
+  DCHECK_GE(cc, 0);
+  DCHECK_LT(cc, 16);
+  code_buffer_.back() += cc;
 
+  // Not expecting to have to encode immediate or do anything special for ModR/M since there are
+  // two registers.
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
 
-  // Check that registers requested for encoding are sane.
-  DCHECK_LT(reg1, 8);
-  DCHECK_LT(base, 8);
-
-  EmitModrmDisp(reg1, base, displacement);
+  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(low_reg1, low_base, disp);
 }
 
-void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) {
+void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int32_t rel) {
   if (entry->opcode == kX86Jmp8) {
     DCHECK(IS_SIMM8(rel));
     code_buffer_.push_back(0xEB);
@@ -1294,17 +1417,17 @@
     code_buffer_.push_back(rel & 0xFF);
   } else {
     DCHECK(entry->opcode == kX86JmpR);
-    uint8_t reg = static_cast<uint8_t>(rel);
-    EmitPrefix(entry, NO_REG, NO_REG, reg);
+    DCHECK_EQ(false, entry->skeleton.r8_form);
+    EmitPrefix(entry, NO_REG, NO_REG, rel);
     code_buffer_.push_back(entry->skeleton.opcode);
-    reg = LowRegisterBits(reg);
-    DCHECK_LT(RegStorage::RegNum(reg), 8);
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+    uint8_t low_reg = LowRegisterBits(rel);
+    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
     code_buffer_.push_back(modrm);
   }
 }
 
-void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) {
+void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc) {
+  DCHECK_GE(cc, 0);
   DCHECK_LT(cc, 16);
   if (entry->opcode == kX86Jcc8) {
     DCHECK(IS_SIMM8(rel));
@@ -1321,16 +1444,18 @@
   }
 }
 
-void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
-  base = LowRegisterBits(base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
+void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
+  uint8_t low_base = LowRegisterBits(raw_base);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int disp) {
-  EmitPrefixAndOpcode(entry);
+void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   DCHECK_EQ(4, entry->skeleton.immediate_bytes);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1339,9 +1464,10 @@
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
 }
 
-void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) {
+void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) {
+  DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1351,8 +1477,8 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg,
-                      int base_or_table, uint8_t index, int scale, int table_or_disp) {
+void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
+                           int32_t raw_index, int scale, int32_t table_or_disp) {
   int disp;
   if (entry->opcode == kX86PcRelLoadRA) {
     Mir2Lir::EmbeddedData *tab_rec =
@@ -1361,31 +1487,28 @@
   } else {
     DCHECK(entry->opcode == kX86PcRelAdr);
     Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table));
+        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(raw_base_or_table));
     disp = tab_rec->offset;
   }
   if (entry->opcode == kX86PcRelLoadRA) {
-    EmitPrefix(entry, reg, index, base_or_table);
-    reg = LowRegisterBits(reg);
-    base_or_table = LowRegisterBits(base_or_table);
-    index = LowRegisterBits(index);
-    DCHECK_LT(RegStorage::RegNum(reg), 8);
+    DCHECK_EQ(false, entry->skeleton.r8_form);
+    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table);
     code_buffer_.push_back(entry->skeleton.opcode);
     DCHECK_NE(0x0F, entry->skeleton.opcode);
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
     DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-    uint8_t modrm = (2 << 6) | (RegStorage::RegNum(reg) << 3) | rs_rX86_SP.GetRegNum();
+    uint8_t low_reg = LowRegisterBits(raw_reg);
+    uint8_t modrm = (2 << 6) | (low_reg << 3) | rs_rX86_SP.GetRegNum();
     code_buffer_.push_back(modrm);
     DCHECK_LT(scale, 4);
-    DCHECK_LT(RegStorage::RegNum(index), 8);
-    DCHECK_LT(RegStorage::RegNum(base_or_table), 8);
-    uint8_t base = static_cast<uint8_t>(base_or_table);
-    uint8_t sib = (scale << 6) | (RegStorage::RegNum(index) << 3) | RegStorage::RegNum(base);
+    uint8_t low_base_or_table = LowRegisterBits(raw_base_or_table);
+    uint8_t low_index = LowRegisterBits(raw_index);
+    uint8_t sib = (scale << 6) | (low_index << 3) | low_base_or_table;
     code_buffer_.push_back(sib);
     DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   } else {
-    DCHECK_LT(RegStorage::RegNum(reg), 8);
-    code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg));
+    uint8_t low_reg = LowRegisterBits(raw_reg);
+    code_buffer_.push_back(entry->skeleton.opcode + low_reg);
   }
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1395,27 +1518,27 @@
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
 }
 
-void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) {
-  DCHECK(entry->opcode == kX86StartOfMethod) << entry->name;
-  EmitPrefix(entry, reg, NO_REG, NO_REG);
-  reg = LowRegisterBits(reg);
+void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
+  DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefix(entry, raw_reg, NO_REG, NO_REG);
   code_buffer_.push_back(0xE8);  // call +0
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
 
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
-  code_buffer_.push_back(0x58 + RegStorage::RegNum(reg));  // pop reg
+  uint8_t low_reg = LowRegisterBits(raw_reg);
+  code_buffer_.push_back(0x58 + low_reg);  // pop reg
 
-  EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], RegStorage::RegNum(reg),
-             offset + 5 /* size of call +0 */);
+  EmitRegImm(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
+             raw_reg, offset + 5 /* size of call +0 */);
 }
 
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
   UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " "
                          << BuildInsnString(entry->fmt, lir, 0);
-  for (int i = 0; i < GetInsnSize(lir); ++i) {
+  for (size_t i = 0; i < GetInsnSize(lir); ++i) {
     code_buffer_.push_back(0xCC);  // push breakpoint instruction - int 3
   }
 }
@@ -1460,7 +1583,9 @@
                   << " delta: " << delta << " old delta: " << lir->operands[0];
             }
             lir->opcode = kX86Jcc32;
-            SetupResourceMasks(lir);
+            lir->flags.size = GetInsnSize(lir);
+            DCHECK(lir->u.m.def_mask->Equals(kEncodeAll));
+            DCHECK(lir->u.m.use_mask->Equals(kEncodeAll));
             res = kRetryAll;
           }
           if (kVerbosePcFixup) {
@@ -1524,7 +1649,9 @@
               LOG(INFO) << "Retry for JMP growth at " << lir->offset;
             }
             lir->opcode = kX86Jmp32;
-            SetupResourceMasks(lir);
+            lir->flags.size = GetInsnSize(lir);
+            DCHECK(lir->u.m.def_mask->Equals(kEncodeAll));
+            DCHECK(lir->u.m.use_mask->Equals(kEncodeAll));
             res = kRetryAll;
           }
           lir->operands[0] = delta;
@@ -1570,21 +1697,8 @@
       case kData:  // 4 bytes of data
         code_buffer_.push_back(lir->operands[0]);
         break;
-      case kNullary:  // 1 byte of opcode
-        DCHECK_EQ(0, entry->skeleton.prefix1);
-        DCHECK_EQ(0, entry->skeleton.prefix2);
-        EmitOpcode(entry);
-        DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-        DCHECK_EQ(0, entry->skeleton.ax_opcode);
-        DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-        break;
-      case kPrefix2Nullary:  // 1 byte of opcode + 2 prefixes.
-        DCHECK_NE(0, entry->skeleton.prefix1);
-        DCHECK_NE(0, entry->skeleton.prefix2);
-        EmitPrefixAndOpcode(entry);
-        DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-        DCHECK_EQ(0, entry->skeleton.ax_opcode);
-        DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+      case kNullary:  // 1 byte of opcode and possible prefixes.
+        EmitNullary(entry);
         break;
       case kRegOpcode:  // lir operands - 0: reg
         EmitOpRegOpcode(entry, lir->operands[0]);
@@ -1628,17 +1742,17 @@
       case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
         EmitRegReg(entry, lir->operands[1], lir->operands[0]);
         break;
-      case kRegRegImmRev:
-        EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kMemRegImm:
+      case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
         EmitMemRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
-      case kRegRegImm:
+      case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
         EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
-      case kRegMemImm:
+      case kRegRegImmStore:   // lir operands - 0: reg2, 1: reg1, 2: imm
+        EmitRegRegImm(entry, lir->operands[1], lir->operands[0], lir->operands[2]);
+        break;
+      case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
         EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
@@ -1711,12 +1825,18 @@
       case kMacro:  // lir operands - 0: reg
         EmitMacro(entry, lir->operands[0], lir->offset);
         break;
-      default:
+      case kNop:  // TODO: these instruction kinds are missing implementations.
+      case kThreadReg:
+      case kRegArrayImm:
+      case kShiftArrayImm:
+      case kShiftArrayCl:
+      case kArrayCond:
+      case kUnimplemented:
         EmitUnimplemented(entry, lir);
         break;
     }
-    CHECK_EQ(static_cast<size_t>(GetInsnSize(lir)),
-             code_buffer_.size() - starting_cbuf_size)
+    DCHECK_EQ(lir->flags.size, GetInsnSize(lir));
+    CHECK_EQ(lir->flags.size, code_buffer_.size() - starting_cbuf_size)
         << "Instruction size mismatch for entry: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
   }
   return res;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index f363eb3..f5fce34 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -18,6 +18,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "gc/accounting/card_table.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -85,11 +86,19 @@
   if (base_of_code_ != nullptr) {
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    rl_method = LoadValue(rl_method, kCoreReg);
+    if (rl_method.wide) {
+      rl_method = LoadValueWide(rl_method, kCoreReg);
+    } else {
+      rl_method = LoadValue(rl_method, kCoreReg);
+    }
     start_of_method_reg = rl_method.reg;
     store_method_addr_used_ = true;
   } else {
-    start_of_method_reg = AllocTemp();
+    if (Gen64Bit()) {
+      start_of_method_reg = AllocTempWide();
+    } else {
+      start_of_method_reg = AllocTemp();
+    }
     NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
   }
   int low_key = s4FromSwitchData(&table[2]);
@@ -107,9 +116,14 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), 2, WrapPointer(tab_rec));
+  NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+          2, WrapPointer(tab_rec));
   // Add displacement to start of method
-  OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
+  if (Gen64Bit()) {
+    NewLIR2(kX86Add64RR, start_of_method_reg.GetReg(), disp_reg.GetReg());
+  } else {
+    OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
+  }
   // ..and go!
   LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg());
   tab_rec->anchor = switch_branch;
@@ -149,13 +163,18 @@
   if (base_of_code_ != nullptr) {
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    LoadValueDirect(rl_method, rs_rX86_ARG2);
+    if (rl_method.wide) {
+      LoadValueDirectWide(rl_method, rs_rX86_ARG2);
+    } else {
+      LoadValueDirect(rl_method, rs_rX86_ARG2);
+    }
     store_method_addr_used_ = true;
   } else {
+    // TODO(64) force to be 64-bit
     NewLIR1(kX86StartOfMethod, rs_rX86_ARG2.GetReg());
   }
   NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
-  NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
+  NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
   if (Is64BitInstructionSet(cu_->instruction_set)) {
     CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
                             rs_rX86_ARG1, true);
@@ -263,9 +282,10 @@
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
     }
     LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch,
-                                                 frame_size_ -
-                                                 GetInstructionSetPointerSize(cu_->instruction_set)));
+    AddSlowPath(
+        new(arena_)StackOverflowSlowPath(this, branch,
+                                         frame_size_ -
+                                         GetInstructionSetPointerSize(cu_->instruction_set)));
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -275,7 +295,7 @@
     setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg());
     int displacement = SRegOffset(base_of_code_->s_reg_low);
     // Native pointer - must be natural word size.
-    setup_method_address_[1] = StoreWordDisp(rs_rX86_SP, displacement, rs_rX86_ARG0);
+    setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, Gen64Bit() ? k64 : k32);
   }
 
   FreeTemp(rs_rX86_ARG0);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 648c148..3540843 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -20,780 +20,832 @@
 #include "dex/compiler_internals.h"
 #include "x86_lir.h"
 
+#include <map>
+
 namespace art {
 
 class X86Mir2Lir : public Mir2Lir {
-  public:
-    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
+ protected:
+  class InToRegStorageMapper {
+   public:
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+    virtual ~InToRegStorageMapper() {}
+  };
 
-    // Required for target - codegen helpers.
-    bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                            RegLocation rl_dest, int lit);
-    bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-    LIR* CheckSuspendUsingLoad() OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
-    LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
-                              OpSize size) OVERRIDE;
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                      OpSize size) OVERRIDE;
-    LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size) OVERRIDE;
-    LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, OpSize size) OVERRIDE;
-    LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
-    LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
-                               OpSize size) OVERRIDE;
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+  class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
+   public:
+    InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+    virtual ~InToRegStorageX86_64Mapper() {}
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+   private:
+    int cur_core_reg_;
+    int cur_fp_reg_;
+  };
+
+  class InToRegStorageMapping {
+   public:
+    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
+    initialized_(false) {}
+    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+    int GetMaxMappedIn() { return max_mapped_in_; }
+    bool IsThereStackMapped() { return is_there_stack_mapped_; }
+    RegStorage Get(int in_position);
+    bool IsInitialized() { return initialized_; }
+   private:
+    std::map<int, RegStorage> mapping_;
+    int max_mapped_in_;
+    bool is_there_stack_mapped_;
+    bool initialized_;
+  };
+
+ public:
+  X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
+
+  // Required for target - codegen helpers.
+  bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
+                          RegLocation rl_dest, int lit);
+  bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+  LIR* CheckSuspendUsingLoad() OVERRIDE;
+  RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
+  RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
+  LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                            OpSize size) OVERRIDE;
+  LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                    OpSize size) OVERRIDE;
+  LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
                        OpSize size) OVERRIDE;
-    LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size) OVERRIDE;
-    LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, OpSize size) OVERRIDE;
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+  LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                           RegStorage r_dest, OpSize size) OVERRIDE;
+  LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
+  LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
+  LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                             OpSize size) OVERRIDE;
+  LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                     OpSize size) OVERRIDE;
+  LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
+                        OpSize size) OVERRIDE;
+  LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                            RegStorage r_src, OpSize size) OVERRIDE;
+  void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
-    // Required for target - register utilities.
-    RegStorage TargetReg(SpecialTargetRegister reg);
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
-    RegLocation GetReturnAlt();
-    RegLocation GetReturnWideAlt();
-    RegLocation LocCReturn();
-    RegLocation LocCReturnRef();
-    RegLocation LocCReturnDouble();
-    RegLocation LocCReturnFloat();
-    RegLocation LocCReturnWide();
-    uint64_t GetRegMaskCommon(RegStorage reg);
-    void AdjustSpillMask();
-    void ClobberCallerSave();
-    void FreeCallTemps();
-    void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, RegStorage reg);
-    void MarkPreservedDouble(int v_reg, RegStorage reg);
-    void CompilerInitializeRegAlloc();
+  // Required for target - register utilities.
+  RegStorage TargetReg(SpecialTargetRegister reg);
+  RegStorage GetArgMappingToPhysicalReg(int arg_num);
+  RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
+  RegLocation GetReturnAlt();
+  RegLocation GetReturnWideAlt();
+  RegLocation LocCReturn();
+  RegLocation LocCReturnRef();
+  RegLocation LocCReturnDouble();
+  RegLocation LocCReturnFloat();
+  RegLocation LocCReturnWide();
+  ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
+  void AdjustSpillMask();
+  void ClobberCallerSave();
+  void FreeCallTemps();
+  void LockCallTemps();
+  void MarkPreservedSingle(int v_reg, RegStorage reg);
+  void MarkPreservedDouble(int v_reg, RegStorage reg);
+  void CompilerInitializeRegAlloc();
 
-    // Required for target - miscellaneous.
-    void AssembleLIR();
-    int AssignInsnOffsets();
-    void AssignOffsets();
-    AssemblerStatus AssembleInstructions(CodeOffset start_addr);
-    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
-    const char* GetTargetInstFmt(int opcode);
-    const char* GetTargetInstName(int opcode);
-    std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
-    uint64_t GetPCUseDefEncoding();
-    uint64_t GetTargetInstFlags(int opcode);
-    int GetInsnSize(LIR* lir);
-    bool IsUnconditionalBranch(LIR* lir);
+  // Required for target - miscellaneous.
+  void AssembleLIR();
+  int AssignInsnOffsets();
+  void AssignOffsets();
+  AssemblerStatus AssembleInstructions(CodeOffset start_addr);
+  void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
+  void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
+  const char* GetTargetInstFmt(int opcode);
+  const char* GetTargetInstName(int opcode);
+  std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
+  ResourceMask GetPCUseDefEncoding() const OVERRIDE;
+  uint64_t GetTargetInstFlags(int opcode);
+  size_t GetInsnSize(LIR* lir) OVERRIDE;
+  bool IsUnconditionalBranch(LIR* lir);
 
-    // Check support for volatile load/store of a given size.
-    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
-    // Get the register class for load/store of a field.
-    RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
+  // Check support for volatile load/store of a given size.
+  bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
+  // Get the register class for load/store of a field.
+  RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
-    // Required for target - Dalvik-level generators.
-    void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                           RegLocation rl_src2);
-    void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                     RegLocation rl_dest, int scale);
-    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
-    void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                          RegLocation rl_src2);
-    void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+  // Required for target - Dalvik-level generators.
+  void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                          RegLocation rl_src2);
-    void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+  void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
+                   RegLocation rl_dest, int scale);
+  void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
+                   RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
+  void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                         RegLocation rl_src1, RegLocation rl_shift);
+  void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
-    void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
-    bool GenInlinedSqrt(CallInfo* info);
-    bool GenInlinedPeek(CallInfo* info, OpSize size);
-    bool GenInlinedPoke(CallInfo* info, OpSize size);
-    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                   RegLocation rl_src2);
-    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                       RegLocation rl_src2, bool is_div);
-    // TODO: collapse reg_lo, reg_hi
-    RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
-    void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenDivZeroCheckWide(RegStorage reg);
-    void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset);
-    void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset);
-    void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
-    void GenExitSequence();
-    void GenSpecialExitSequence();
-    void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
-    void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
-    void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
-    void GenSelect(BasicBlock* bb, MIR* mir);
-    bool GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMoveException(RegLocation rl_dest);
-    void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                       int first_bit, int second_bit);
-    void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
-    void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
-    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+  void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation rl_src2);
+  void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2);
+  void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                RegLocation rl_src2);
+  void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+  bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
+  bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+  bool GenInlinedSqrt(CallInfo* info);
+  bool GenInlinedPeek(CallInfo* info, OpSize size);
+  bool GenInlinedPoke(CallInfo* info, OpSize size);
+  void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
+  void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
+  void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                 RegLocation rl_src2);
+  void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+  void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                     RegLocation rl_src2, bool is_div);
+  // TODO: collapse reg_lo, reg_hi
+  RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
+  RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
+  void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+  void GenDivZeroCheckWide(RegStorage reg);
+  void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset);
+  void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset);
+  void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
+  void GenExitSequence();
+  void GenSpecialExitSequence();
+  void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
+  void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
+  void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
+  void GenSelect(BasicBlock* bb, MIR* mir);
+  bool GenMemBarrier(MemBarrierKind barrier_kind);
+  void GenMoveException(RegLocation rl_dest);
+  void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
+                                     int first_bit, int second_bit);
+  void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
+  void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
+  void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+  void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+  void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
 
-    /*
-     * @brief Generate a two address long operation with a constant value
-     * @param rl_dest location of result
-     * @param rl_src constant source operand
-     * @param op Opcode to be generated
-     */
-    void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
-    /*
-     * @brief Generate a three address long operation with a constant value
-     * @param rl_dest location of result
-     * @param rl_src1 source operand
-     * @param rl_src2 constant source operand
-     * @param op Opcode to be generated
-     */
-    void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                        Instruction::Code op);
+  /*
+   * @brief Generate a two address long operation with a constant value
+   * @param rl_dest location of result
+   * @param rl_src constant source operand
+   * @param op Opcode to be generated
+   * @return success or not
+   */
+  bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+  /*
+   * @brief Generate a three address long operation with a constant value
+   * @param rl_dest location of result
+   * @param rl_src1 source operand
+   * @param rl_src2 constant source operand
+   * @param op Opcode to be generated
+   * @return success or not
+   */
+  bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                      Instruction::Code op);
 
-    /**
-      * @brief Generate a long arithmetic operation.
-      * @param rl_dest The destination.
-      * @param rl_src1 First operand.
-      * @param rl_src2 Second operand.
-      * @param op The DEX opcode for the operation.
-      * @param is_commutative The sources can be swapped if needed.
-      */
-    virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                              Instruction::Code op, bool is_commutative);
+  /**
+   * @brief Generate a long arithmetic operation.
+   * @param rl_dest The destination.
+   * @param rl_src1 First operand.
+   * @param rl_src2 Second operand.
+   * @param op The DEX opcode for the operation.
+   * @param is_commutative The sources can be swapped if needed.
+   */
+  virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                            Instruction::Code op, bool is_commutative);
 
-    /**
-      * @brief Generate a two operand long arithmetic operation.
-      * @param rl_dest The destination.
-      * @param rl_src Second operand.
-      * @param op The DEX opcode for the operation.
-      */
-    void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+  /**
+   * @brief Generate a two operand long arithmetic operation.
+   * @param rl_dest The destination.
+   * @param rl_src Second operand.
+   * @param op The DEX opcode for the operation.
+   */
+  void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
-    /**
-      * @brief Generate a long operation.
-      * @param rl_dest The destination.  Must be in a register
-      * @param rl_src The other operand.  May be in a register or in memory.
-      * @param op The DEX opcode for the operation.
-      */
-    virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+  /**
+   * @brief Generate a long operation.
+   * @param rl_dest The destination.  Must be in a register
+   * @param rl_src The other operand.  May be in a register or in memory.
+   * @param op The DEX opcode for the operation.
+   */
+  virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
-    /**
-     * @brief Implement instanceof a final class with x86 specific code.
-     * @param use_declaring_class 'true' if we can use the class itself.
-     * @param type_idx Type index to use if use_declaring_class is 'false'.
-     * @param rl_dest Result to be set to 0 or 1.
-     * @param rl_src Object to be tested.
-     */
-    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                            RegLocation rl_src);
-    /*
-     *
-     * @brief Implement Set up instanceof a class with x86 specific code.
-     * @param needs_access_check 'true' if we must check the access.
-     * @param type_known_final 'true' if the type is known to be a final class.
-     * @param type_known_abstract 'true' if the type is known to be an abstract class.
-     * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
-     * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
-     * @param type_idx Type index to use if use_declaring_class is 'false'.
-     * @param rl_dest Result to be set to 0 or 1.
-     * @param rl_src Object to be tested.
-     */
-    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                    bool type_known_abstract, bool use_declaring_class,
-                                    bool can_assume_type_is_in_dex_cache,
-                                    uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
+  /**
+   * @brief Implement instanceof a final class with x86 specific code.
+   * @param use_declaring_class 'true' if we can use the class itself.
+   * @param type_idx Type index to use if use_declaring_class is 'false'.
+   * @param rl_dest Result to be set to 0 or 1.
+   * @param rl_src Object to be tested.
+   */
+  void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
+                          RegLocation rl_src);
+  /*
+   *
+   * @brief Implement Set up instanceof a class with x86 specific code.
+   * @param needs_access_check 'true' if we must check the access.
+   * @param type_known_final 'true' if the type is known to be a final class.
+   * @param type_known_abstract 'true' if the type is known to be an abstract class.
+   * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
+   * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
+   * @param type_idx Type index to use if use_declaring_class is 'false'.
+   * @param rl_dest Result to be set to 0 or 1.
+   * @param rl_src Object to be tested.
+   */
+  void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                  bool type_known_abstract, bool use_declaring_class,
+                                  bool can_assume_type_is_in_dex_cache,
+                                  uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
 
-    // Single operation generators.
-    LIR* OpUnconditionalBranch(LIR* target);
-    LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
-    LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
-    LIR* OpCondBranch(ConditionCode cc, LIR* target);
-    LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
-    LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
-    LIR* OpIT(ConditionCode cond, const char* guide);
-    void OpEndIT(LIR* it);
-    LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
-    LIR* OpReg(OpKind op, RegStorage r_dest_src);
-    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
-    LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
-    LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value);
-    LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
-    LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
-    LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
-    LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
-    LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
-    LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
-    LIR* OpTestSuspend(LIR* target);
-    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
-    LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
-    LIR* OpVldm(RegStorage r_base, int count);
-    LIR* OpVstm(RegStorage r_base, int count);
-    void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
-    void OpRegCopyWide(RegStorage dest, RegStorage src);
-    void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
-    void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
+  void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                      RegLocation rl_src1, RegLocation rl_shift);
 
-    void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
-    void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
-    void SpillCoreRegs();
-    void UnSpillCoreRegs();
-    static const X86EncodingMap EncodingMap[kX86Last];
-    bool InexpensiveConstantInt(int32_t value);
-    bool InexpensiveConstantFloat(int32_t value);
-    bool InexpensiveConstantLong(int64_t value);
-    bool InexpensiveConstantDouble(int64_t value);
+  // Single operation generators.
+  LIR* OpUnconditionalBranch(LIR* target);
+  LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
+  LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
+  LIR* OpCondBranch(ConditionCode cc, LIR* target);
+  LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
+  LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
+  LIR* OpIT(ConditionCode cond, const char* guide);
+  void OpEndIT(LIR* it);
+  LIR* OpMem(OpKind op, RegStorage r_base, int disp);
+  LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+  LIR* OpReg(OpKind op, RegStorage r_dest_src);
+  void OpRegCopy(RegStorage r_dest, RegStorage r_src);
+  LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
+  LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
+  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
+  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value);
+  LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
+  LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
+  LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
+  LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
+  LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
+  LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
+  LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
+  LIR* OpTestSuspend(LIR* target);
+  LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
+  LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
+  LIR* OpVldm(RegStorage r_base, int count);
+  LIR* OpVstm(RegStorage r_base, int count);
+  void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
+  void OpRegCopyWide(RegStorage dest, RegStorage src);
+  void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
+  void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
 
-    /*
-     * @brief Should try to optimize for two address instructions?
-     * @return true if we try to avoid generating three operand instructions.
-     */
-    virtual bool GenerateTwoOperandInstructions() const { return true; }
+  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
+  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
+  void SpillCoreRegs();
+  void UnSpillCoreRegs();
+  static const X86EncodingMap EncodingMap[kX86Last];
+  bool InexpensiveConstantInt(int32_t value);
+  bool InexpensiveConstantFloat(int32_t value);
+  bool InexpensiveConstantLong(int64_t value);
+  bool InexpensiveConstantDouble(int64_t value);
 
-    /*
-     * @brief x86 specific codegen for int operations.
-     * @param opcode Operation to perform.
-     * @param rl_dest Destination for the result.
-     * @param rl_lhs Left hand operand.
-     * @param rl_rhs Right hand operand.
-     */
-    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs,
-                       RegLocation rl_rhs);
+  /*
+   * @brief Should try to optimize for two address instructions?
+   * @return true if we try to avoid generating three operand instructions.
+   */
+  virtual bool GenerateTwoOperandInstructions() const { return true; }
 
-    /*
-     * @brief Dump a RegLocation using printf
-     * @param loc Register location to dump
-     */
-    static void DumpRegLocation(RegLocation loc);
+  /*
+   * @brief x86 specific codegen for int operations.
+   * @param opcode Operation to perform.
+   * @param rl_dest Destination for the result.
+   * @param rl_lhs Left hand operand.
+   * @param rl_rhs Right hand operand.
+   */
+  void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs,
+                     RegLocation rl_rhs);
 
-    /*
-     * @brief Load the Method* of a dex method into the register.
-     * @param target_method The MethodReference of the method to be invoked.
-     * @param type How the method will be invoked.
-     * @param register that will contain the code address.
-     * @note register will be passed to TargetReg to get physical register.
-     */
-    void LoadMethodAddress(const MethodReference& target_method, InvokeType type,
-                           SpecialTargetRegister symbolic_reg);
+  /*
+   * @brief Dump a RegLocation using printf
+   * @param loc Register location to dump
+   */
+  static void DumpRegLocation(RegLocation loc);
 
-    /*
-     * @brief Load the Class* of a Dex Class type into the register.
-     * @param type How the method will be invoked.
-     * @param register that will contain the code address.
-     * @note register will be passed to TargetReg to get physical register.
-     */
-    void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+  /*
+   * @brief Load the Method* of a dex method into the register.
+   * @param target_method The MethodReference of the method to be invoked.
+   * @param type How the method will be invoked.
+   * @param register that will contain the code address.
+   * @note register will be passed to TargetReg to get physical register.
+   */
+  void LoadMethodAddress(const MethodReference& target_method, InvokeType type,
+                         SpecialTargetRegister symbolic_reg);
 
-    /*
-     * @brief Generate a relative call to the method that will be patched at link time.
-     * @param target_method The MethodReference of the method to be invoked.
-     * @param type How the method will be invoked.
-     * @returns Call instruction
-     */
-    virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+  /*
+   * @brief Load the Class* of a Dex Class type into the register.
+   * @param type How the method will be invoked.
+   * @param register that will contain the code address.
+   * @note register will be passed to TargetReg to get physical register.
+   */
+  void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
 
-    /*
-     * @brief Handle x86 specific literals
-     */
-    void InstallLiteralPools();
+  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
 
-    /*
-     * @brief Generate the debug_frame CFI information.
-     * @returns pointer to vector containing CFE information
-     */
-    static std::vector<uint8_t>* ReturnCommonCallFrameInformation();
+  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                           NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx,
+                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                           bool skip_this);
 
-    /*
-     * @brief Generate the debug_frame FDE information.
-     * @returns pointer to vector containing CFE information
-     */
-    std::vector<uint8_t>* ReturnCallFrameInformation();
+  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                         NextCallInsn next_call_insn,
+                         const MethodReference& target_method,
+                         uint32_t vtable_idx,
+                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                         bool skip_this);
 
-  protected:
-    size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement,
-                       int reg_r, int reg_x, bool has_sib);
-    uint8_t LowRegisterBits(uint8_t reg);
-    bool NeedsRex(uint8_t reg);
-    void EmitPrefix(const X86EncodingMap* entry);
-    void EmitPrefix(const X86EncodingMap* entry, uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
-    void EmitOpcode(const X86EncodingMap* entry);
-    void EmitPrefixAndOpcode(const X86EncodingMap* entry);
-    void EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                             uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
-    void EmitDisp(uint8_t base, int disp);
-    void EmitModrmThread(uint8_t reg_or_opcode);
-    void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp);
-    void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp);
-    void EmitImm(const X86EncodingMap* entry, int64_t imm);
-    void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg);
-    void EmitOpReg(const X86EncodingMap* entry, uint8_t reg);
-    void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp);
-    void EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp);
-    void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg);
-    void EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm);
-    void EmitRegMem(const X86EncodingMap* entry, uint8_t reg, uint8_t base, int disp);
-    void EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index,
-                      int scale, int disp);
-    void EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
-                      uint8_t reg);
-    void EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
-                      int32_t imm);
-    void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
-    void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
-    void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
-    void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
-    void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp,
-                       int32_t imm);
-    void EmitMemRegImm(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg1, int32_t imm);
-    void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
-    void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
-    void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm);
-    void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
-    void EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int imm);
-    void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
-    void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
-    void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
-    void EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition);
+  /*
+   * @brief Generate a relative call to the method that will be patched at link time.
+   * @param target_method The MethodReference of the method to be invoked.
+   * @param type How the method will be invoked.
+   * @returns Call instruction
+   */
+  virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
 
-    /**
-     * @brief Used for encoding conditional register to register operation.
-     * @param entry The entry in the encoding map for the opcode.
-     * @param reg1 The first physical register.
-     * @param reg2 The second physical register.
-     * @param condition The condition code for operation.
-     */
-    void EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition);
+  /*
+   * @brief Handle x86 specific literals
+   */
+  void InstallLiteralPools();
 
-    /**
-     * @brief Used for encoding conditional register to memory operation.
-     * @param entry The entry in the encoding map for the opcode.
-     * @param reg1 The first physical register.
-     * @param base The memory base register.
-     * @param displacement The memory displacement.
-     * @param condition The condition code for operation.
-     */
-    void EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition);
+  /*
+   * @brief Generate the debug_frame CFI information.
+   * @returns pointer to vector containing CFE information
+   */
+  static std::vector<uint8_t>* ReturnCommonCallFrameInformation();
 
-    void EmitJmp(const X86EncodingMap* entry, int rel);
-    void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc);
-    void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp);
-    void EmitCallImmediate(const X86EncodingMap* entry, int disp);
-    void EmitCallThread(const X86EncodingMap* entry, int disp);
-    void EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index,
-                   int scale, int table_or_disp);
-    void EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset);
-    void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
-    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
-                                  int64_t val, ConditionCode ccode);
-    void GenConstWide(RegLocation rl_dest, int64_t value);
+  /*
+   * @brief Generate the debug_frame FDE information.
+   * @returns pointer to vector containing CFE information
+   */
+  std::vector<uint8_t>* ReturnCallFrameInformation();
 
-    static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
+ protected:
+  size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
+                     int32_t raw_base, int32_t displacement);
+  void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
+  void EmitPrefix(const X86EncodingMap* entry,
+                  int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b);
+  void EmitOpcode(const X86EncodingMap* entry);
+  void EmitPrefixAndOpcode(const X86EncodingMap* entry,
+                           int32_t reg_r, int32_t reg_x, int32_t reg_b);
+  void EmitDisp(uint8_t base, int32_t disp);
+  void EmitModrmThread(uint8_t reg_or_opcode);
+  void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp);
+  void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale,
+                        int32_t disp);
+  void EmitImm(const X86EncodingMap* entry, int64_t imm);
+  void EmitNullary(const X86EncodingMap* entry);
+  void EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg);
+  void EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg);
+  void EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp);
+  void EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
+                   int32_t disp);
+  void EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_reg);
+  void EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, int32_t disp);
+  void EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
+                    int32_t raw_index, int scale, int32_t disp);
+  void EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
+                    int32_t disp, int32_t raw_reg);
+  void EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm);
+  void EmitArrayImm(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
+                    int32_t raw_disp, int32_t imm);
+  void EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp);
+  void EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2);
+  void EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t imm);
+  void EmitRegMemImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp,
+                     int32_t imm);
+  void EmitMemRegImm(const X86EncodingMap* entry, int32_t base, int32_t disp, int32_t raw_reg1,
+                     int32_t imm);
+  void EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm);
+  void EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm);
+  void EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm);
+  void EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm);
+  void EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl);
+  void EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_cl);
+  void EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm);
+  void EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc);
+  void EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t cc);
+  void EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t cc);
+  void EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp,
+                      int32_t cc);
 
-    /*
-     * @brief Ensure that a temporary register is byte addressable.
-     * @returns a temporary guarenteed to be byte addressable.
-     */
-    virtual RegStorage AllocateByteRegister();
+  void EmitJmp(const X86EncodingMap* entry, int32_t rel);
+  void EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc);
+  void EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp);
+  void EmitCallImmediate(const X86EncodingMap* entry, int32_t disp);
+  void EmitCallThread(const X86EncodingMap* entry, int32_t disp);
+  void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
+                 int32_t raw_index, int scale, int32_t table_or_disp);
+  void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset);
+  void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
+  void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
+                                int64_t val, ConditionCode ccode);
+  void GenConstWide(RegLocation rl_dest, int64_t value);
 
-    /*
-     * @brief generate inline code for fast case of Strng.indexOf.
-     * @param info Call parameters
-     * @param zero_based 'true' if the index into the string is 0.
-     * @returns 'true' if the call was inlined, 'false' if a regular call needs to be
-     * generated.
-     */
-    bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
+  static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
 
-    /*
-     * @brief Load 128 bit constant into vector register.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector
-     * @note vA is the TypeSize for the register.
-     * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values.
-     */
-    void GenConst128(BasicBlock* bb, MIR* mir);
+  /*
+   * @brief Ensure that a temporary register is byte addressable.
+   * @returns a temporary guarenteed to be byte addressable.
+   */
+  virtual RegStorage AllocateByteRegister();
+
+  /*
+   * @brief Check if a register is byte addressable.
+   * @returns true if a register is byte addressable.
+   */
+  bool IsByteRegister(RegStorage reg);
+
+  /*
+   * @brief generate inline code for fast case of Strng.indexOf.
+   * @param info Call parameters
+   * @param zero_based 'true' if the index into the string is 0.
+   * @returns 'true' if the call was inlined, 'false' if a regular call needs to be
+   * generated.
+   */
+  bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
+
+  /*
+   * @brief Load 128 bit constant into vector register.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector
+   * @note vA is the TypeSize for the register.
+   * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values.
+   */
+  void GenConst128(BasicBlock* bb, MIR* mir);
+
+  /*
+   * @brief MIR to move a vectorized register to another.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination
+   * @note vC: source
+   */
+  void GenMoveVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief MIR to move a vectorized register to another.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination
-     * @note vC: source
-     */
-    void GenMoveVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenMultiplyVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenMultiplyVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenAddVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenAddVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenSubtractVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenSubtractVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: immediate
+   */
+  void GenShiftLeftVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: immediate
-     */
-    void GenShiftLeftVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: immediate
+   */
+  void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: immediate
-     */
-    void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from..
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: immediate
+   */
+  void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from..
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: immediate
-     */
-    void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenAndVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenAndVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenOrVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenOrVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination and source
+   * @note vC: source
+   */
+  void GenXorVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination and source
-     * @note vC: source
-     */
-    void GenXorVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Reduce a 128-bit packed element into a single VR by taking lower bits
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @details Instruction does a horizontal addition of the packed elements and then adds it to VR.
+   * @note vA: TypeSize
+   * @note vB: destination and source VR (not vector register)
+   * @note vC: source (vector register)
+   */
+  void GenAddReduceVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Reduce a 128-bit packed element into a single VR by taking lower bits
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @details Instruction does a horizontal addition of the packed elements and then adds it to VR.
-     * @note vA: TypeSize
-     * @note vB: destination and source VR (not vector register)
-     * @note vC: source (vector register)
-     */
-    void GenAddReduceVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Extract a packed element into a single VR.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize
+   * @note vB: destination VR (not vector register)
+   * @note vC: source (vector register)
+   * @note arg[0]: The index to use for extraction from vector register (which packed element).
+   */
+  void GenReduceVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Extract a packed element into a single VR.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize
-     * @note vB: destination VR (not vector register)
-     * @note vC: source (vector register)
-     * @note arg[0]: The index to use for extraction from vector register (which packed element).
-     */
-    void GenReduceVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Create a vector value, with all TypeSize values equal to vC
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is kMirConstVector.
+   * @note vA: TypeSize.
+   * @note vB: destination vector register.
+   * @note vC: source VR (not vector register).
+   */
+  void GenSetVector(BasicBlock *bb, MIR *mir);
 
-    /*
-     * @brief Create a vector value, with all TypeSize values equal to vC
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirConstVector.
-     * @note vA: TypeSize.
-     * @note vB: destination vector register.
-     * @note vC: source VR (not vector register).
-     */
-    void GenSetVector(BasicBlock *bb, MIR *mir);
+  /*
+   * @brief Generate code for a vector opcode.
+   * @param bb The basic block in which the MIR is from.
+   * @param mir The MIR whose opcode is a non-standard opcode.
+   */
+  void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
 
-    /*
-     * @brief Generate code for a vector opcode.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is a non-standard opcode.
-     */
-    void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+  /*
+   * @brief Return the correct x86 opcode for the Dex operation
+   * @param op Dex opcode for the operation
+   * @param loc Register location of the operand
+   * @param is_high_op 'true' if this is an operation on the high word
+   * @param value Immediate value for the operation.  Used for byte variants
+   * @returns the correct x86 opcode to perform the operation
+   */
+  X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
 
-    /*
-     * @brief Return the correct x86 opcode for the Dex operation
-     * @param op Dex opcode for the operation
-     * @param loc Register location of the operand
-     * @param is_high_op 'true' if this is an operation on the high word
-     * @param value Immediate value for the operation.  Used for byte variants
-     * @returns the correct x86 opcode to perform the operation
-     */
-    X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
+  /*
+   * @brief Return the correct x86 opcode for the Dex operation
+   * @param op Dex opcode for the operation
+   * @param dest location of the destination.  May be register or memory.
+   * @param rhs Location for the rhs of the operation.  May be in register or memory.
+   * @param is_high_op 'true' if this is an operation on the high word
+   * @returns the correct x86 opcode to perform the operation
+   * @note at most one location may refer to memory
+   */
+  X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                      bool is_high_op);
 
-    /*
-     * @brief Return the correct x86 opcode for the Dex operation
-     * @param op Dex opcode for the operation
-     * @param dest location of the destination.  May be register or memory.
-     * @param rhs Location for the rhs of the operation.  May be in register or memory.
-     * @param is_high_op 'true' if this is an operation on the high word
-     * @returns the correct x86 opcode to perform the operation
-     * @note at most one location may refer to memory
-     */
-    X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
-                        bool is_high_op);
+  /*
+   * @brief Is this operation a no-op for this opcode and value
+   * @param op Dex opcode for the operation
+   * @param value Immediate value for the operation.
+   * @returns 'true' if the operation will have no effect
+   */
+  bool IsNoOp(Instruction::Code op, int32_t value);
 
-    /*
-     * @brief Is this operation a no-op for this opcode and value
-     * @param op Dex opcode for the operation
-     * @param value Immediate value for the operation.
-     * @returns 'true' if the operation will have no effect
-     */
-    bool IsNoOp(Instruction::Code op, int32_t value);
+  /**
+   * @brief Calculate magic number and shift for a given divisor
+   * @param divisor divisor number for calculation
+   * @param magic hold calculated magic number
+   * @param shift hold calculated shift
+   */
+  void CalculateMagicAndShift(int divisor, int& magic, int& shift);
 
-    /**
-     * @brief Calculate magic number and shift for a given divisor
-     * @param divisor divisor number for calculation
-     * @param magic hold calculated magic number
-     * @param shift hold calculated shift
-     */
-    void CalculateMagicAndShift(int divisor, int& magic, int& shift);
+  /*
+   * @brief Generate an integer div or rem operation.
+   * @param rl_dest Destination Location.
+   * @param rl_src1 Numerator Location.
+   * @param rl_src2 Divisor Location.
+   * @param is_div 'true' if this is a division, 'false' for a remainder.
+   * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+   */
+  RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                        bool is_div, bool check_zero);
 
-    /*
-     * @brief Generate an integer div or rem operation.
-     * @param rl_dest Destination Location.
-     * @param rl_src1 Numerator Location.
-     * @param rl_src2 Divisor Location.
-     * @param is_div 'true' if this is a division, 'false' for a remainder.
-     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
-     */
-    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                          bool is_div, bool check_zero);
+  /*
+   * @brief Generate an integer div or rem operation by a literal.
+   * @param rl_dest Destination Location.
+   * @param rl_src Numerator Location.
+   * @param lit Divisor.
+   * @param is_div 'true' if this is a division, 'false' for a remainder.
+   */
+  RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
 
-    /*
-     * @brief Generate an integer div or rem operation by a literal.
-     * @param rl_dest Destination Location.
-     * @param rl_src Numerator Location.
-     * @param lit Divisor.
-     * @param is_div 'true' if this is a division, 'false' for a remainder.
-     */
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
+  /*
+   * Generate code to implement long shift operations.
+   * @param opcode The DEX opcode to specify the shift type.
+   * @param rl_dest The destination.
+   * @param rl_src The value to be shifted.
+   * @param shift_amount How much to shift.
+   * @returns the RegLocation of the result.
+   */
+  RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                RegLocation rl_src, int shift_amount);
+  /*
+   * Generate an imul of a register by a constant or a better sequence.
+   * @param dest Destination Register.
+   * @param src Source Register.
+   * @param val Constant multiplier.
+   */
+  void GenImulRegImm(RegStorage dest, RegStorage src, int val);
 
-    /*
-     * Generate code to implement long shift operations.
-     * @param opcode The DEX opcode to specify the shift type.
-     * @param rl_dest The destination.
-     * @param rl_src The value to be shifted.
-     * @param shift_amount How much to shift.
-     * @returns the RegLocation of the result.
-     */
-    RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                  RegLocation rl_src, int shift_amount);
-    /*
-     * Generate an imul of a register by a constant or a better sequence.
-     * @param dest Destination Register.
-     * @param src Source Register.
-     * @param val Constant multiplier.
-     */
-    void GenImulRegImm(RegStorage dest, RegStorage src, int val);
+  /*
+   * Generate an imul of a memory location by a constant or a better sequence.
+   * @param dest Destination Register.
+   * @param sreg Symbolic register.
+   * @param displacement Displacement on stack of Symbolic Register.
+   * @param val Constant multiplier.
+   */
+  void GenImulMemImm(RegStorage dest, int sreg, int displacement, int val);
 
-    /*
-     * Generate an imul of a memory location by a constant or a better sequence.
-     * @param dest Destination Register.
-     * @param sreg Symbolic register.
-     * @param displacement Displacement on stack of Symbolic Register.
-     * @param val Constant multiplier.
-     */
-    void GenImulMemImm(RegStorage dest, int sreg, int displacement, int val);
+  /*
+   * @brief Compare memory to immediate, and branch if condition true.
+   * @param cond The condition code that when true will branch to the target.
+   * @param temp_reg A temporary register that can be used if compare memory is not
+   * supported by the architecture.
+   * @param base_reg The register holding the base address.
+   * @param offset The offset from the base.
+   * @param check_value The immediate to compare to.
+   */
+  LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
+                         int offset, int check_value, LIR* target);
 
-    /*
-     * @brief Compare memory to immediate, and branch if condition true.
-     * @param cond The condition code that when true will branch to the target.
-     * @param temp_reg A temporary register that can be used if compare memory is not
-     * supported by the architecture.
-     * @param base_reg The register holding the base address.
-     * @param offset The offset from the base.
-     * @param check_value The immediate to compare to.
-     */
-    LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                           int offset, int check_value, LIR* target);
+  /*
+   * Can this operation be using core registers without temporaries?
+   * @param rl_lhs Left hand operand.
+   * @param rl_rhs Right hand operand.
+   * @returns 'true' if the operation can proceed without needing temporary regs.
+   */
+  bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
 
-    /*
-     * Can this operation be using core registers without temporaries?
-     * @param rl_lhs Left hand operand.
-     * @param rl_rhs Right hand operand.
-     * @returns 'true' if the operation can proceed without needing temporary regs.
-     */
-    bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
+  /**
+   * @brief Generates inline code for conversion of long to FP by using x87/
+   * @param rl_dest The destination of the FP.
+   * @param rl_src The source of the long.
+   * @param is_double 'true' if dealing with double, 'false' for float.
+   */
+  virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
 
-    /**
-     * @brief Generates inline code for conversion of long to FP by using x87/
-     * @param rl_dest The destination of the FP.
-     * @param rl_src The source of the long.
-     * @param is_double 'true' if dealing with double, 'false' for float.
-     */
-    virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
+  /*
+   * @brief Perform MIR analysis before compiling method.
+   * @note Invokes Mir2LiR::Materialize after analysis.
+   */
+  void Materialize();
 
-    /*
-     * @brief Perform MIR analysis before compiling method.
-     * @note Invokes Mir2LiR::Materialize after analysis.
-     */
-    void Materialize();
+  /*
+   * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
+   * without regard to data type.  In practice, this can result in UpdateLoc returning a
+   * location record for a Dalvik float value in a core register, and vis-versa.  For targets
+   * which can inexpensively move data between core and float registers, this can often be a win.
+   * However, for x86 this is generally not a win.  These variants of UpdateLoc()
+   * take a register class argument - and will return an in-register location record only if
+   * the value is live in a temp register of the correct class.  Additionally, if the value is in
+   * a temp register of the wrong register class, it will be clobbered.
+   */
+  RegLocation UpdateLocTyped(RegLocation loc, int reg_class);
+  RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class);
 
-    /*
-     * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
-     * without regard to data type.  In practice, this can result in UpdateLoc returning a
-     * location record for a Dalvik float value in a core register, and vis-versa.  For targets
-     * which can inexpensively move data between core and float registers, this can often be a win.
-     * However, for x86 this is generally not a win.  These variants of UpdateLoc()
-     * take a register class argument - and will return an in-register location record only if
-     * the value is live in a temp register of the correct class.  Additionally, if the value is in
-     * a temp register of the wrong register class, it will be clobbered.
-     */
-    RegLocation UpdateLocTyped(RegLocation loc, int reg_class);
-    RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class);
+  /*
+   * @brief Analyze MIR before generating code, to prepare for the code generation.
+   */
+  void AnalyzeMIR();
 
-    /*
-     * @brief Analyze MIR before generating code, to prepare for the code generation.
-     */
-    void AnalyzeMIR();
+  /*
+   * @brief Analyze one basic block.
+   * @param bb Basic block to analyze.
+   */
+  void AnalyzeBB(BasicBlock * bb);
 
-    /*
-     * @brief Analyze one basic block.
-     * @param bb Basic block to analyze.
-     */
-    void AnalyzeBB(BasicBlock * bb);
+  /*
+   * @brief Analyze one extended MIR instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Extended instruction to analyze.
+   */
+  void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir);
 
-    /*
-     * @brief Analyze one extended MIR instruction
-     * @param opcode MIR instruction opcode.
-     * @param bb Basic block containing instruction.
-     * @param mir Extended instruction to analyze.
-     */
-    void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir);
+  /*
+   * @brief Analyze one MIR instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Instruction to analyze.
+   */
+  virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
 
-    /*
-     * @brief Analyze one MIR instruction
-     * @param opcode MIR instruction opcode.
-     * @param bb Basic block containing instruction.
-     * @param mir Instruction to analyze.
-     */
-    virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
+  /*
+   * @brief Analyze one MIR float/double instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Instruction to analyze.
+   */
+  void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
 
-    /*
-     * @brief Analyze one MIR float/double instruction
-     * @param opcode MIR instruction opcode.
-     * @param bb Basic block containing instruction.
-     * @param mir Instruction to analyze.
-     */
-    void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
+  /*
+   * @brief Analyze one use of a double operand.
+   * @param rl_use Double RegLocation for the operand.
+   */
+  void AnalyzeDoubleUse(RegLocation rl_use);
 
-    /*
-     * @brief Analyze one use of a double operand.
-     * @param rl_use Double RegLocation for the operand.
-     */
-    void AnalyzeDoubleUse(RegLocation rl_use);
+  bool Gen64Bit() const  { return gen64bit_; }
 
-    bool Gen64Bit() const  { return gen64bit_; }
+  // Information derived from analysis of MIR
 
-    // Information derived from analysis of MIR
+  // The compiler temporary for the code address of the method.
+  CompilerTemp *base_of_code_;
 
-    // The compiler temporary for the code address of the method.
-    CompilerTemp *base_of_code_;
+  // Have we decided to compute a ptr to code and store in temporary VR?
+  bool store_method_addr_;
 
-    // Have we decided to compute a ptr to code and store in temporary VR?
-    bool store_method_addr_;
+  // Have we used the stored method address?
+  bool store_method_addr_used_;
 
-    // Have we used the stored method address?
-    bool store_method_addr_used_;
+  // Instructions to remove if we didn't use the stored method address.
+  LIR* setup_method_address_[2];
 
-    // Instructions to remove if we didn't use the stored method address.
-    LIR* setup_method_address_[2];
+  // Instructions needing patching with Method* values.
+  GrowableArray<LIR*> method_address_insns_;
 
-    // Instructions needing patching with Method* values.
-    GrowableArray<LIR*> method_address_insns_;
+  // Instructions needing patching with Class Type* values.
+  GrowableArray<LIR*> class_type_address_insns_;
 
-    // Instructions needing patching with Class Type* values.
-    GrowableArray<LIR*> class_type_address_insns_;
+  // Instructions needing patching with PC relative code addresses.
+  GrowableArray<LIR*> call_method_insns_;
 
-    // Instructions needing patching with PC relative code addresses.
-    GrowableArray<LIR*> call_method_insns_;
+  // Prologue decrement of stack pointer.
+  LIR* stack_decrement_;
 
-    // Prologue decrement of stack pointer.
-    LIR* stack_decrement_;
+  // Epilogue increment of stack pointer.
+  LIR* stack_increment_;
 
-    // Epilogue increment of stack pointer.
-    LIR* stack_increment_;
+  // 64-bit mode
+  bool gen64bit_;
 
-    // 64-bit mode
-    bool gen64bit_;
+  // The list of const vector literals.
+  LIR *const_vectors_;
 
-    // The list of const vector literals.
-    LIR *const_vectors_;
+  /*
+   * @brief Search for a matching vector literal
+   * @param mir A kMirOpConst128b MIR instruction to match.
+   * @returns pointer to matching LIR constant, or nullptr if not found.
+   */
+  LIR *ScanVectorLiteral(MIR *mir);
 
-    /*
-     * @brief Search for a matching vector literal
-     * @param mir A kMirOpConst128b MIR instruction to match.
-     * @returns pointer to matching LIR constant, or nullptr if not found.
-     */
-    LIR *ScanVectorLiteral(MIR *mir);
+  /*
+   * @brief Add a constant vector literal
+   * @param mir A kMirOpConst128b MIR instruction to match.
+   */
+  LIR *AddVectorLiteral(MIR *mir);
 
-    /*
-     * @brief Add a constant vector literal
-     * @param mir A kMirOpConst128b MIR instruction to match.
-     */
-    LIR *AddVectorLiteral(MIR *mir);
+  InToRegStorageMapping in_to_reg_storage_mapping_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 0421a59..f6f0617 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -147,6 +147,9 @@
   // Update the in-register state of source.
   rl_src = UpdateLocWide(rl_src);
 
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
   // If the source is in physical register, then put it in its location on stack.
   if (rl_src.location == kLocPhysReg) {
     RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
@@ -191,15 +194,12 @@
      * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
      * correct register class.
      */
+    rl_result = EvalLoc(rl_dest, kFPReg, true);
     if (is_double) {
-      rl_result = EvalLocWide(rl_dest, kFPReg, true);
-
       LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
 
       StoreFinalValueWide(rl_dest, rl_result);
     } else {
-      rl_result = EvalLoc(rl_dest, kFPReg, true);
-
       Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
 
       StoreFinalValue(rl_dest, rl_result);
@@ -272,21 +272,67 @@
       return;
     }
     case Instruction::LONG_TO_DOUBLE:
+      if (Gen64Bit()) {
+        rcSrc = kCoreReg;
+        op = kX86Cvtsqi2sdRR;
+        break;
+      }
       GenLongToFP(rl_dest, rl_src, true /* is_double */);
       return;
     case Instruction::LONG_TO_FLOAT:
+      if (Gen64Bit()) {
+        rcSrc = kCoreReg;
+        op = kX86Cvtsqi2ssRR;
+       break;
+      }
       GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src);
+      if (Gen64Bit()) {
+        rl_src = LoadValue(rl_src, kFPReg);
+        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
+        ClobberSReg(rl_dest.s_reg_low);
+        rl_result = EvalLoc(rl_dest, kCoreReg, true);
+        RegStorage temp_reg = AllocTempSingle();
+
+        // Set 0x7fffffffffffffff to rl_result
+        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+        NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+        NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
+        NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
+        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
+        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
+        branch_normal->target = NewLIR0(kPseudoTargetLabel);
+        StoreValueWide(rl_dest, rl_result);
       } else {
         GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
       }
       return;
     case Instruction::DOUBLE_TO_LONG:
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src);
+      if (Gen64Bit()) {
+        rl_src = LoadValueWide(rl_src, kFPReg);
+        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
+        ClobberSReg(rl_dest.s_reg_low);
+        rl_result = EvalLoc(rl_dest, kCoreReg, true);
+        RegStorage temp_reg = AllocTempDouble();
+
+        // Set 0x7fffffffffffffff to rl_result
+        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+        NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+        NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
+        NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
+        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
+        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
+        branch_normal->target = NewLIR0(kPseudoTargetLabel);
+        StoreValueWide(rl_dest, rl_result);
       } else {
         GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
       }
@@ -335,7 +381,7 @@
     branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
   }
   // If the result reg can't be byte accessed, use a jump and move instead of a set.
-  if (rl_result.reg.GetReg() >= rs_rX86_SP.GetReg()) {
+  if (!IsByteRegister(rl_result.reg)) {
     LIR* branch2 = NULL;
     if (unordered_gt) {
       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
@@ -434,9 +480,14 @@
 void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
   RegLocation rl_result;
   rl_src = LoadValueWide(rl_src, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
-  OpRegCopy(rl_result.reg, rl_src.reg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  if (Gen64Bit()) {
+    LoadConstantWide(rl_result.reg, 0x8000000000000000);
+    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
+  } else {
+    OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
+    OpRegCopy(rl_result.reg, rl_src.reg);
+  }
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 1cc16b9..05b5e43 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -31,6 +31,23 @@
  */
 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
+  if (Gen64Bit()) {
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegReg(kOpXor, rl_result.reg, rl_result.reg);  // result = 0
+    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondNe);  // result = (src1 != src2) ? 1 : result
+    RegStorage temp_reg = AllocTemp();
+    OpRegReg(kOpNeg, temp_reg, rl_result.reg);
+    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+    // result = (src1 < src2) ? -result : result
+    OpCondRegReg(kOpCmov, kCondLt, rl_result.reg, temp_reg);
+    StoreValue(rl_dest, rl_result);
+    FreeTemp(temp_reg);
+    return;
+  }
+
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage
   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
@@ -108,7 +125,7 @@
   }
   if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
-  LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR,
+  LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
                     r_dest.GetReg(), r_src.GetReg());
   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
     res->flags.is_nop = true;
@@ -133,36 +150,51 @@
       } else {
         // TODO: Prevent this from happening in the code. The result is often
         // unused or could have been loaded more easily from memory.
-        NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
-        RegStorage r_tmp = AllocTempDouble();
-        NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
-        NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
-        FreeTemp(r_tmp);
+        if (!r_src.IsPair()) {
+          DCHECK(!r_dest.IsPair());
+          NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
+        } else {
+          NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
+          RegStorage r_tmp = AllocTempDouble();
+          NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
+          NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
+          FreeTemp(r_tmp);
+        }
       }
     } else {
       if (src_fp) {
-        NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
-        RegStorage temp_reg = AllocTempDouble();
-        NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
-        NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
-        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
-      } else {
-        DCHECK(r_dest.IsPair());
-        DCHECK(r_src.IsPair());
-        // Handle overlap
-        if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) {
-          // Deal with cycles.
-          RegStorage temp_reg = AllocTemp();
-          OpRegCopy(temp_reg, r_dest.GetHigh());
-          OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
-          OpRegCopy(r_dest.GetLow(), temp_reg);
-          FreeTemp(temp_reg);
-        } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+        if (!r_dest.IsPair()) {
+          DCHECK(!r_src.IsPair());
+          NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
         } else {
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
+          RegStorage temp_reg = AllocTempDouble();
+          NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
+          NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
+          NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
+        }
+      } else {
+        DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
+        if (!r_src.IsPair()) {
+          // Just copy the register directly.
+          OpRegCopy(r_dest, r_src);
+        } else {
+          // Handle overlap
+          if (r_src.GetHighReg() == r_dest.GetLowReg() &&
+              r_src.GetLowReg() == r_dest.GetHighReg()) {
+            // Deal with cycles.
+            RegStorage temp_reg = AllocTemp();
+            OpRegCopy(temp_reg, r_dest.GetHigh());
+            OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
+            OpRegCopy(r_dest.GetLow(), temp_reg);
+            FreeTemp(temp_reg);
+          } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          } else {
+            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          }
         }
       }
     }
@@ -762,34 +794,61 @@
     RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
     LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
     LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
-    NewLIR1(kX86Push32R, rs_rDI.GetReg());
-    MarkTemp(rs_rDI);
-    LockTemp(rs_rDI);
-    NewLIR1(kX86Push32R, rs_rSI.GetReg());
-    MarkTemp(rs_rSI);
-    LockTemp(rs_rSI);
-    const int push_offset = 4 /* push edi */ + 4 /* push esi */;
-    int srcObjSp = IsInReg(this, rl_src_obj, rs_rSI) ? 0
-                : (IsInReg(this, rl_src_obj, rs_rDI) ? 4
-                : (SRegOffset(rl_src_obj.s_reg_low) + push_offset));
     // FIXME: needs 64-bit update.
-    LoadWordDisp(TargetReg(kSp), srcObjSp, rs_rDI);
-    int srcOffsetSp = IsInReg(this, rl_src_offset, rs_rSI) ? 0
-                   : (IsInReg(this, rl_src_offset, rs_rDI) ? 4
-                   : (SRegOffset(rl_src_offset.s_reg_low) + push_offset));
-    LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI);
-    NewLIR4(kX86LockCmpxchg8bA, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0);
+    const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
+    const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
+    DCHECK(!obj_in_si || !obj_in_di);
+    const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
+    const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
+    DCHECK(!off_in_si || !off_in_di);
+    // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
+    RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
+    RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
+    bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
+    bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
+    if (push_di) {
+      NewLIR1(kX86Push32R, rs_rDI.GetReg());
+      MarkTemp(rs_rDI);
+      LockTemp(rs_rDI);
+    }
+    if (push_si) {
+      NewLIR1(kX86Push32R, rs_rSI.GetReg());
+      MarkTemp(rs_rSI);
+      LockTemp(rs_rSI);
+    }
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
+    if (!obj_in_si && !obj_in_di) {
+      LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
+      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
+      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
+      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
+      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
+    }
+    if (!off_in_si && !off_in_di) {
+      LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
+      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
+      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
+      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
+      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
+    }
+    NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
 
     // After a store we need to insert barrier in case of potential load. Since the
     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
     GenMemBarrier(kStoreLoad);
 
-    FreeTemp(rs_rSI);
-    UnmarkTemp(rs_rSI);
-    NewLIR1(kX86Pop32R, rs_rSI.GetReg());
-    FreeTemp(rs_rDI);
-    UnmarkTemp(rs_rDI);
-    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+
+    if (push_si) {
+      FreeTemp(rs_rSI);
+      UnmarkTemp(rs_rSI);
+      NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    }
+    if (push_di) {
+      FreeTemp(rs_rDI);
+      UnmarkTemp(rs_rDI);
+      NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    }
     FreeCallTemps();
   } else {
     // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
@@ -821,8 +880,17 @@
   // Convert ZF to boolean
   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondZ);
-  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+  RegStorage result_reg = rl_result.reg;
+
+  // For 32-bit, SETcc only works with EAX..EDX.
+  if (!IsByteRegister(result_reg)) {
+    result_reg = AllocateByteRegister();
+  }
+  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
+  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
+  if (IsTemp(result_reg)) {
+    FreeTemp(result_reg);
+  }
   StoreValue(rl_dest, rl_result);
   return true;
 }
@@ -832,18 +900,22 @@
 
   // Address the start of the method
   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  LoadValueDirectFixed(rl_method, reg);
+  if (rl_method.wide) {
+    LoadValueDirectWideFixed(rl_method, reg);
+  } else {
+    LoadValueDirectFixed(rl_method, reg);
+  }
   store_method_addr_used_ = true;
 
   // Load the proper value from the literal area.
   // We don't know the proper offset for the value, so pick one that will force
   // 4 byte offset.  We will fix this up in the assembler later to have the right
   // value.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
                     0, 0, target);
   res->target = target;
   res->flags.fixup = kFixupLoad;
-  SetMemRefType(res, true, kLiteral);
   store_method_addr_used_ = true;
   return res;
 }
@@ -871,18 +943,23 @@
 }
 
 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  DCHECK(reg.IsPair());  // TODO: allow 64BitSolo.
-  // We are not supposed to clobber the incoming storage, so allocate a temporary.
-  RegStorage t_reg = AllocTemp();
+  if (Gen64Bit()) {
+    DCHECK(reg.Is64Bit());
 
-  // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
-  OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
+    NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
+  } else {
+    DCHECK(reg.IsPair());
+
+    // We are not supposed to clobber the incoming storage, so allocate a temporary.
+    RegStorage t_reg = AllocTemp();
+    // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
+    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
+    // The temp is no longer needed so free it at this time.
+    FreeTemp(t_reg);
+  }
 
   // In case of zero, throw ArithmeticException.
   GenDivZeroCheck(kCondEq);
-
-  // The temp is no longer needed so free it at this time.
-  FreeTemp(t_reg);
 }
 
 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
@@ -1026,6 +1103,9 @@
 }
 
 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
   LIR *m;
   switch (val) {
     case 0:
@@ -1044,6 +1124,58 @@
 
 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
+  if (Gen64Bit()) {
+    if (rl_src1.is_const) {
+      std::swap(rl_src1, rl_src2);
+    }
+    // Are we multiplying by a constant?
+    if (rl_src2.is_const) {
+      int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+      if (val == 0) {
+        RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+        OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
+        StoreValueWide(rl_dest, rl_result);
+        return;
+      } else if (val == 1) {
+        StoreValueWide(rl_dest, rl_src1);
+        return;
+      } else if (val == 2) {
+        GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
+        return;
+      } else if (IsPowerOfTwo(val)) {
+        int shift_amount = LowestSetBit(val);
+        if (!BadOverlap(rl_src1, rl_dest)) {
+          rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+          RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
+                                                    rl_src1, shift_amount);
+          StoreValueWide(rl_dest, rl_result);
+          return;
+        }
+      }
+    }
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
+        rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
+      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+    } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
+               rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
+      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
+    } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
+               rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
+      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    } else {
+      OpRegCopy(rl_result.reg, rl_src1.reg);
+      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    }
+    StoreValueWide(rl_dest, rl_result);
+    return;
+  }
+
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
   }
@@ -1221,18 +1353,22 @@
   if (rl_src.location == kLocPhysReg) {
     // Both operands are in registers.
     // But we must ensure that rl_src is in pair
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
-      // The registers are the same, so we would clobber it before the use.
-      RegStorage temp_reg = AllocTemp();
-      OpRegCopy(temp_reg, rl_dest.reg);
-      rl_src.reg.SetHighReg(temp_reg.GetReg());
-    }
-    NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
+    if (Gen64Bit()) {
+      NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
+    } else {
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
+        // The registers are the same, so we would clobber it before the use.
+        RegStorage temp_reg = AllocTemp();
+        OpRegCopy(temp_reg, rl_dest.reg);
+        rl_src.reg.SetHighReg(temp_reg.GetReg());
+      }
+      NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
 
-    x86op = GetOpcode(op, rl_dest, rl_src, true);
-    NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
-    FreeTemp(rl_src.reg);
+      x86op = GetOpcode(op, rl_dest, rl_src, true);
+      NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
+      FreeTemp(rl_src.reg);  // ???
+    }
     return;
   }
 
@@ -1242,13 +1378,16 @@
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_src.s_reg_low);
 
-  LIR *lir = NewLIR3(x86op, rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+  LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
-  x86op = GetOpcode(op, rl_dest, rl_src, true);
-  lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
+  if (!Gen64Bit()) {
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is64bit */);
+  }
 }
 
 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
@@ -1273,17 +1412,21 @@
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_dest.s_reg_low);
 
-  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, rl_src.reg.GetLowReg());
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
+                     Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           false /* is_load */, true /* is64bit */);
-  x86op = GetOpcode(op, rl_dest, rl_src, true);
-  lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          false /* is_load */, true /* is64bit */);
+  if (!Gen64Bit()) {
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is64bit */);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            false /* is_load */, true /* is64bit */);
+  }
   FreeTemp(rl_src.reg);
 }
 
@@ -1330,23 +1473,44 @@
 
   // Get one of the source operands into temporary register.
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
-    GenLongRegOrMemOp(rl_src1, rl_src2, op);
-  } else if (is_commutative) {
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-    // We need at least one of them to be a temporary.
-    if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
+  if (Gen64Bit()) {
+    if (IsTemp(rl_src1.reg)) {
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    } else if (is_commutative) {
+      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+      // We need at least one of them to be a temporary.
+      if (!IsTemp(rl_src2.reg)) {
+        rl_src1 = ForceTempWide(rl_src1);
+        GenLongRegOrMemOp(rl_src1, rl_src2, op);
+      } else {
+        GenLongRegOrMemOp(rl_src2, rl_src1, op);
+        StoreFinalValueWide(rl_dest, rl_src2);
+        return;
+      }
+    } else {
+      // Need LHS to be the temp.
       rl_src1 = ForceTempWide(rl_src1);
       GenLongRegOrMemOp(rl_src1, rl_src2, op);
-    } else {
-      GenLongRegOrMemOp(rl_src2, rl_src1, op);
-      StoreFinalValueWide(rl_dest, rl_src2);
-      return;
     }
   } else {
-    // Need LHS to be the temp.
-    rl_src1 = ForceTempWide(rl_src1);
-    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    } else if (is_commutative) {
+      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+      // We need at least one of them to be a temporary.
+      if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
+        rl_src1 = ForceTempWide(rl_src1);
+        GenLongRegOrMemOp(rl_src1, rl_src2, op);
+      } else {
+        GenLongRegOrMemOp(rl_src2, rl_src1, op);
+        StoreFinalValueWide(rl_dest, rl_src2);
+        return;
+      }
+    } else {
+      // Need LHS to be the temp.
+      rl_src1 = ForceTempWide(rl_src1);
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    }
   }
 
   StoreFinalValueWide(rl_dest, rl_src1);
@@ -1378,27 +1542,91 @@
 }
 
 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
-  LOG(FATAL) << "Unexpected use GenNotLong()";
+  if (Gen64Bit()) {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result;
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegCopy(rl_result.reg, rl_src.reg);
+    OpReg(kOpNot, rl_result.reg);
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    LOG(FATAL) << "Unexpected use GenNotLong()";
+  }
 }
 
 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2, bool is_div) {
-  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+  if (!Gen64Bit()) {
+    LOG(FATAL) << "Unexpected use GenDivRemLong()";
+    return;
+  }
+
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Load LHS into RAX.
+  LoadValueDirectWideFixed(rl_src1, rs_r0q);
+
+  // Load RHS into RCX.
+  LoadValueDirectWideFixed(rl_src2, rs_r1q);
+
+  // Copy LHS sign bit into RDX.
+  NewLIR0(kx86Cqo64Da);
+
+  // Handle division by zero case.
+  GenDivZeroCheckWide(rs_r1q);
+
+  // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
+  NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
+  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // RHS is -1.
+  LoadConstantWide(rs_r3q, 0x8000000000000000);
+  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r3q.GetReg());
+  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // In 0x8000000000000000/-1 case.
+  if (!is_div) {
+    // For DIV, RAX is already right. For REM, we need RDX 0.
+    NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
+  }
+  LIR* done = NewLIR1(kX86Jmp8, 0);
+
+  // Expected case.
+  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
+  minint_branch->target = minus_one_branch->target;
+  NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
+  done->target = NewLIR0(kPseudoTargetLabel);
+
+  // Result is in RAX for div and RDX for rem.
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
+  if (!is_div) {
+    rl_result.reg.SetReg(r2q);
+  }
+
+  StoreValueWide(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = ForceTempWide(rl_src);
-  if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
-      ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
-    // The registers are the same, so we would clobber it before the use.
-    RegStorage temp_reg = AllocTemp();
-    OpRegCopy(temp_reg, rl_result.reg);
-    rl_result.reg.SetHighReg(temp_reg.GetReg());
+  RegLocation rl_result;
+  if (Gen64Bit()) {
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
+  } else {
+    rl_result = ForceTempWide(rl_src);
+    if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
+        ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
+      // The registers are the same, so we would clobber it before the use.
+      RegStorage temp_reg = AllocTemp();
+      OpRegCopy(temp_reg, rl_result.reg);
+      rl_result.reg.SetHighReg(temp_reg.GetReg());
+    }
+    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
+    OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
+    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   }
-  OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
-  OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
-  OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1531,8 +1759,7 @@
     rl_src = LoadValue(rl_src, reg_class);
   }
   // If the src reg can't be byte accessed, move it to a temp first.
-  if ((size == kSignedByte || size == kUnsignedByte) &&
-      rl_src.reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
     RegStorage temp = AllocTemp();
     OpRegCopy(temp, rl_src.reg);
     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
@@ -1551,60 +1778,84 @@
 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                           RegLocation rl_src, int shift_amount) {
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), shift_amount);
-        NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
-      }
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount);
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
-      }
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount);
-        NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
-      }
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case";
+  if (Gen64Bit()) {
+    OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
+    switch (opcode) {
+      case Instruction::SHL_LONG:
+      case Instruction::SHL_LONG_2ADDR:
+        op = kOpLsl;
+        break;
+      case Instruction::SHR_LONG:
+      case Instruction::SHR_LONG_2ADDR:
+        op = kOpAsr;
+        break;
+      case Instruction::USHR_LONG:
+      case Instruction::USHR_LONG_2ADDR:
+        op = kOpLsr;
+        break;
+      default:
+        LOG(FATAL) << "Unexpected case";
+    }
+    OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
+  } else {
+    switch (opcode) {
+      case Instruction::SHL_LONG:
+      case Instruction::SHL_LONG_2ADDR:
+        DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
+          LoadConstant(rl_result.reg.GetLow(), 0);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
+          NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
+          LoadConstant(rl_result.reg.GetLow(), 0);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
+                  shift_amount);
+          NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
+        }
+        break;
+      case Instruction::SHR_LONG:
+      case Instruction::SHR_LONG_2ADDR:
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
+                  shift_amount);
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
+        }
+        break;
+      case Instruction::USHR_LONG:
+      case Instruction::USHR_LONG_2ADDR:
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          LoadConstant(rl_result.reg.GetHigh(), 0);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
+          LoadConstant(rl_result.reg.GetHigh(), 0);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
+                  shift_amount);
+          NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
+        }
+        break;
+      default:
+        LOG(FATAL) << "Unexpected case";
+    }
   }
   return rl_result;
 }
@@ -1634,24 +1885,26 @@
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  bool isConstSuccess = false;
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::AND_LONG:
     case Instruction::OR_LONG:
     case Instruction::XOR_LONG:
       if (rl_src2.is_const) {
-        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
       } else {
         DCHECK(rl_src1.is_const);
-        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
       }
       break;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (rl_src2.is_const) {
-        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
       } else {
         GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+        isConstSuccess = true;
       }
       break;
     case Instruction::ADD_LONG_2ADDR:
@@ -1660,20 +1913,24 @@
     case Instruction::AND_LONG_2ADDR:
       if (rl_src2.is_const) {
         if (GenerateTwoOperandInstructions()) {
-          GenLongImm(rl_dest, rl_src2, opcode);
+          isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
         } else {
-          GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+          isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
         }
       } else {
         DCHECK(rl_src1.is_const);
-        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
       }
       break;
     default:
-      // Default - bail to non-const handler.
-      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      isConstSuccess = false;
       break;
   }
+
+  if (!isConstSuccess) {
+    // Default - bail to non-const handler.
+    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  }
 }
 
 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
@@ -1695,40 +1952,50 @@
                                 bool is_high_op) {
   bool rhs_in_mem = rhs.location != kLocPhysReg;
   bool dest_in_mem = dest.location != kLocPhysReg;
+  bool is64Bit = Gen64Bit();
   DCHECK(!rhs_in_mem || !dest_in_mem);
   switch (op) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       if (dest_in_mem) {
-        return is_high_op ? kX86Adc32MR : kX86Add32MR;
+        return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
       } else if (rhs_in_mem) {
-        return is_high_op ? kX86Adc32RM : kX86Add32RM;
+        return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
       }
-      return is_high_op ? kX86Adc32RR : kX86Add32RR;
+      return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (dest_in_mem) {
-        return is_high_op ? kX86Sbb32MR : kX86Sub32MR;
+        return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
       } else if (rhs_in_mem) {
-        return is_high_op ? kX86Sbb32RM : kX86Sub32RM;
+        return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
       }
-      return is_high_op ? kX86Sbb32RR : kX86Sub32RR;
+      return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (dest_in_mem) {
-        return kX86And32MR;
+        return is64Bit ? kX86And64MR : kX86And32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86And64RM : kX86And64RR;
       }
       return rhs_in_mem ? kX86And32RM : kX86And32RR;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (dest_in_mem) {
-        return kX86Or32MR;
+        return is64Bit ? kX86Or64MR : kX86Or32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
       }
       return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (dest_in_mem) {
-        return kX86Xor32MR;
+        return is64Bit ? kX86Xor64MR : kX86Xor32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
       }
       return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
     default:
@@ -1740,6 +2007,7 @@
 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
                                 int32_t value) {
   bool in_mem = loc.location != kLocPhysReg;
+  bool is64Bit = Gen64Bit();
   bool byte_imm = IS_SIMM8(value);
   DCHECK(in_mem || !loc.reg.IsFloat());
   switch (op) {
@@ -1747,43 +2015,61 @@
     case Instruction::ADD_LONG_2ADDR:
       if (byte_imm) {
         if (in_mem) {
-          return is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
+          return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
         }
-        return is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
+        return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
       }
       if (in_mem) {
-        return is_high_op ? kX86Adc32MI : kX86Add32MI;
+        return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
       }
-      return is_high_op ? kX86Adc32RI : kX86Add32RI;
+      return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (byte_imm) {
         if (in_mem) {
-          return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
+          return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
         }
-        return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
+        return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
       }
       if (in_mem) {
-        return is_high_op ? kX86Sbb32MI : kX86Sub32MI;
+        return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
       }
-      return is_high_op ? kX86Sbb32RI : kX86Sub32RI;
+      return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86And64MI8 : kX86And64RI8;
+        }
         return in_mem ? kX86And32MI8 : kX86And32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86And64MI : kX86And64RI;
+      }
       return in_mem ? kX86And32MI : kX86And32RI;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86Or64MI8 : kX86Or64RI8;
+        }
         return in_mem ? kX86Or32MI8 : kX86Or32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86Or64MI : kX86Or64RI;
+      }
       return in_mem ? kX86Or32MI : kX86Or32RI;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
+        }
         return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86Xor64MI : kX86Xor64RI;
+      }
       return in_mem ? kX86Xor32MI : kX86Xor32RI;
     default:
       LOG(FATAL) << "Unexpected opcode: " << op;
@@ -1791,9 +2077,44 @@
   }
 }
 
-void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   DCHECK(rl_src.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src);
+
+  if (Gen64Bit()) {
+    // We can do with imm only if it fits 32 bit
+    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
+      return false;
+    }
+
+    rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
+
+    if ((rl_dest.location == kLocDalvikFrame) ||
+        (rl_dest.location == kLocCompilerTemp)) {
+      int r_base = TargetReg(kSp).GetReg();
+      int displacement = SRegOffset(rl_dest.s_reg_low);
+
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
+      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, true /* is64bit */);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+      return true;
+    }
+
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    DCHECK_EQ(rl_result.location, kLocPhysReg);
+    DCHECK(!rl_result.reg.IsFloat());
+
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
+    NewLIR2(x86op, rl_result.reg.GetReg(), val);
+
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  }
+
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
@@ -1804,6 +2125,7 @@
     int r_base = TargetReg(kSp).GetReg();
     int displacement = SRegOffset(rl_dest.s_reg_low);
 
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     if (!IsNoOp(op, val_lo)) {
       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
@@ -1820,7 +2142,7 @@
       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                                 false /* is_load */, true /* is64bit */);
     }
-    return;
+    return true;
   }
 
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
@@ -1836,12 +2158,38 @@
     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   }
   StoreValueWide(rl_dest, rl_result);
+  return true;
 }
 
-void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
                                 RegLocation rl_src2, Instruction::Code op) {
   DCHECK(rl_src2.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+
+  if (Gen64Bit()) {
+    // We can do with imm only if it fits 32 bit
+    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
+      return false;
+    }
+    if (rl_dest.location == kLocPhysReg &&
+        rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
+      NewLIR2(x86op, rl_dest.reg.GetReg(), val);
+      StoreFinalValueWide(rl_dest, rl_dest);
+      return true;
+    }
+
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    // We need the values to be in a temporary
+    RegLocation rl_result = ForceTempWide(rl_src1);
+
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
+    NewLIR2(x86op, rl_result.reg.GetReg(), val);
+
+    StoreFinalValueWide(rl_dest, rl_result);
+    return true;
+  }
+
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
@@ -1861,7 +2209,7 @@
     }
 
     StoreFinalValueWide(rl_dest, rl_dest);
-    return;
+    return true;
   }
 
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
@@ -1879,6 +2227,7 @@
   }
 
   StoreFinalValueWide(rl_dest, rl_result);
+  return true;
 }
 
 // For final classes there are no sub-classes to check and so we can answer the instance-of
@@ -1889,17 +2238,21 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
-  // SETcc only works with EAX..EDX.
-  if (result_reg == object.reg || result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  // For 32-bit, SETcc only works with EAX..EDX.
+  if (result_reg == object.reg || !IsByteRegister(result_reg)) {
     result_reg = AllocateByteRegister();
-    DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
 
   // Assume that there is no match.
   LoadConstant(result_reg, 0);
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kRefReg);
+  // We will use this register to compare to memory below.
+  // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
+  // For this reason, force allocation of a 32 bit register to use, so that the
+  // compare to memory will be done using a 32 bit comparision.
+  // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
+  RegStorage check_class = AllocTemp();
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
@@ -1999,7 +2352,7 @@
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
   RegLocation rl_result = GetReturn(kRefReg);
 
-  // SETcc only works with EAX..EDX.
+  // For 32-bit, SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
 
   // Is the class NULL?
@@ -2150,6 +2503,9 @@
     return;
   }
 
+  // If we generate any memory access below, it will reference a dalvik reg.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
   if (unary) {
     rl_lhs = LoadValue(rl_lhs, kCoreReg);
     rl_result = UpdateLocTyped(rl_dest, kCoreReg);
@@ -2239,7 +2595,8 @@
             // We should be careful with order here
             // If rl_dest and rl_lhs points to the same VR we should load first
             // If the are different we should find a register first for dest
-            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
+            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
+                mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
               rl_lhs = LoadValue(rl_lhs, kCoreReg);
               rl_result = EvalLoc(rl_dest, kCoreReg, true);
               // No-op if these are the same.
@@ -2289,4 +2646,85 @@
   // Everything will be fine :-).
   return true;
 }
+
+void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
+  if (!Gen64Bit()) {
+    Mir2Lir::GenIntToLong(rl_dest, rl_src);
+    return;
+  }
+  rl_src = UpdateLoc(rl_src);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (rl_src.location == kLocPhysReg) {
+    NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    int displacement = SRegOffset(rl_src.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
+                     displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                        RegLocation rl_src1, RegLocation rl_shift) {
+  if (!Gen64Bit()) {
+    Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+    return;
+  }
+
+  bool is_two_addr = false;
+  OpKind op = kOpBkpt;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::SHL_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHL_LONG:
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHR_LONG:
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::USHR_LONG:
+      op = kOpLsr;
+      break;
+    default:
+      op = kOpBkpt;
+  }
+
+  // X86 doesn't require masking and must use ECX.
+  RegStorage t_reg = TargetReg(kCount);  // rCX
+  LoadValueDirectFixed(rl_shift, t_reg);
+  if (is_two_addr) {
+    // Can we do this directly into memory?
+    rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
+    if (rl_result.location != kLocPhysReg) {
+      // Okay, we can do this into memory
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      OpMemReg(op, rl_result, t_reg.GetReg());
+    } else if (!rl_result.reg.IsFloat()) {
+      // Can do this directly into the result register
+      OpRegReg(op, rl_result.reg, t_reg);
+      StoreFinalValueWide(rl_dest, rl_result);
+    }
+  } else {
+    // Three address form, or we can't do directly.
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
+    StoreFinalValueWide(rl_dest, rl_result);
+  }
+
+  FreeTemp(t_reg);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 4d8fd1b..483d8cf 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -31,33 +31,25 @@
 };
 static constexpr RegStorage core_regs_arr_64[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
-#ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
-#endif
 };
 static constexpr RegStorage core_regs_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
-#ifdef TARGET_REX_SUPPORT
     rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
-#endif
 };
 static constexpr RegStorage sp_regs_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
 static constexpr RegStorage sp_regs_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-#ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
-#endif
 };
 static constexpr RegStorage dp_regs_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
 static constexpr RegStorage dp_regs_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-#ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
-#endif
 };
 static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
 static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
@@ -65,33 +57,25 @@
 static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
 static constexpr RegStorage core_temps_arr_64[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
-#ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11
-#endif
 };
 static constexpr RegStorage core_temps_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
-#ifdef TARGET_REX_SUPPORT
     rs_r8q, rs_r9q, rs_r10q, rs_r11q
-#endif
 };
 static constexpr RegStorage sp_temps_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
 static constexpr RegStorage sp_temps_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-#ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
-#endif
 };
 static constexpr RegStorage dp_temps_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
 static constexpr RegStorage dp_temps_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-#ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
-#endif
 };
 
 static constexpr RegStorage xp_temps_arr_32[] = {
@@ -99,9 +83,7 @@
 };
 static constexpr RegStorage xp_temps_arr_64[] = {
     rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
-#ifdef TARGET_REX_SUPPORT
     rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
-#endif
 };
 
 static constexpr ArrayRef<const RegStorage> empty_pool;
@@ -132,10 +114,16 @@
 X86NativeRegisterPool rX86_ARG1;
 X86NativeRegisterPool rX86_ARG2;
 X86NativeRegisterPool rX86_ARG3;
+X86NativeRegisterPool rX86_ARG4;
+X86NativeRegisterPool rX86_ARG5;
 X86NativeRegisterPool rX86_FARG0;
 X86NativeRegisterPool rX86_FARG1;
 X86NativeRegisterPool rX86_FARG2;
 X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_FARG4;
+X86NativeRegisterPool rX86_FARG5;
+X86NativeRegisterPool rX86_FARG6;
+X86NativeRegisterPool rX86_FARG7;
 X86NativeRegisterPool rX86_RET0;
 X86NativeRegisterPool rX86_RET1;
 X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -145,10 +133,16 @@
 RegStorage rs_rX86_ARG1;
 RegStorage rs_rX86_ARG2;
 RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_ARG4;
+RegStorage rs_rX86_ARG5;
 RegStorage rs_rX86_FARG0;
 RegStorage rs_rX86_FARG1;
 RegStorage rs_rX86_FARG2;
 RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_FARG4;
+RegStorage rs_rX86_FARG5;
+RegStorage rs_rX86_FARG6;
+RegStorage rs_rX86_FARG7;
 RegStorage rs_rX86_RET0;
 RegStorage rs_rX86_RET1;
 RegStorage rs_rX86_INVOKE_TGT;
@@ -164,7 +158,7 @@
 }
 
 RegLocation X86Mir2Lir::LocCReturnWide() {
-  return x86_loc_c_return_wide;
+  return Gen64Bit() ? x86_64_loc_c_return_wide : x86_loc_c_return_wide;
 }
 
 RegLocation X86Mir2Lir::LocCReturnFloat() {
@@ -188,109 +182,94 @@
     case kArg1: res_reg = rs_rX86_ARG1; break;
     case kArg2: res_reg = rs_rX86_ARG2; break;
     case kArg3: res_reg = rs_rX86_ARG3; break;
+    case kArg4: res_reg = rs_rX86_ARG4; break;
+    case kArg5: res_reg = rs_rX86_ARG5; break;
     case kFArg0: res_reg = rs_rX86_FARG0; break;
     case kFArg1: res_reg = rs_rX86_FARG1; break;
     case kFArg2: res_reg = rs_rX86_FARG2; break;
     case kFArg3: res_reg = rs_rX86_FARG3; break;
+    case kFArg4: res_reg = rs_rX86_FARG4; break;
+    case kFArg5: res_reg = rs_rX86_FARG5; break;
+    case kFArg6: res_reg = rs_rX86_FARG6; break;
+    case kFArg7: res_reg = rs_rX86_FARG7; break;
     case kRet0: res_reg = rs_rX86_RET0; break;
     case kRet1: res_reg = rs_rX86_RET1; break;
     case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
     case kHiddenArg: res_reg = rs_rAX; break;
-    case kHiddenFpArg: res_reg = rs_fr0; break;
+    case kHiddenFpArg: DCHECK(!Gen64Bit()); res_reg = rs_fr0; break;
     case kCount: res_reg = rs_rX86_COUNT; break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
 
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  // TODO: This is not 64-bit compliant and depends on new internal ABI.
-  switch (arg_num) {
-    case 0:
-      return rs_rX86_ARG1;
-    case 1:
-      return rs_rX86_ARG2;
-    case 2:
-      return rs_rX86_ARG3;
-    default:
-      return RegStorage::InvalidReg();
-  }
-}
-
 /*
  * Decode the register id.
  */
-uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
-  uint64_t seed;
-  int shift;
-  int reg_id;
-
-  reg_id = reg.GetRegNum();
-  /* Double registers in x86 are just a single FP register */
-  seed = 1;
-  /* FP register starts at bit position 16 */
-  shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0;
-  /* Expand the double register id into single offset */
-  shift += reg_id;
-  return (seed << shift);
+ResourceMask X86Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
+  /* Double registers in x86 are just a single FP register. This is always just a single bit. */
+  return ResourceMask::Bit(
+      /* FP register starts at bit position 16 */
+      ((reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0) + reg.GetRegNum());
 }
 
-uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
+ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const {
   /*
    * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
    * able to clean up some of the x86/Arm_Mips differences
    */
   LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
-  return 0ULL;
+  return kEncodeNone;
 }
 
-void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+                                          ResourceMask* use_mask, ResourceMask* def_mask) {
   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
   DCHECK(!lir->flags.use_def_invalid);
 
   // X86-specific resource map setup here.
   if (flags & REG_USE_SP) {
-    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
+    use_mask->SetBit(kX86RegSP);
   }
 
   if (flags & REG_DEF_SP) {
-    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
+    def_mask->SetBit(kX86RegSP);
   }
 
   if (flags & REG_DEFA) {
-    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
+    SetupRegMask(def_mask, rs_rAX.GetReg());
   }
 
   if (flags & REG_DEFD) {
-    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
+    SetupRegMask(def_mask, rs_rDX.GetReg());
   }
   if (flags & REG_USEA) {
-    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
+    SetupRegMask(use_mask, rs_rAX.GetReg());
   }
 
   if (flags & REG_USEC) {
-    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
+    SetupRegMask(use_mask, rs_rCX.GetReg());
   }
 
   if (flags & REG_USED) {
-    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
+    SetupRegMask(use_mask, rs_rDX.GetReg());
   }
 
   if (flags & REG_USEB) {
-    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
+    SetupRegMask(use_mask, rs_rBX.GetReg());
   }
 
   // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
   if (lir->opcode == kX86RepneScasw) {
-    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
-    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
-    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
-    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
+    SetupRegMask(use_mask, rs_rAX.GetReg());
+    SetupRegMask(use_mask, rs_rCX.GetReg());
+    SetupRegMask(use_mask, rs_rDI.GetReg());
+    SetupRegMask(def_mask, rs_rDI.GetReg());
   }
 
   if (flags & USE_FP_STACK) {
-    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
-    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
+    use_mask->SetBit(kX86FPStack);
+    def_mask->SetBit(kX86FPStack);
   }
 }
 
@@ -382,40 +361,40 @@
   return buf;
 }
 
-void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
+void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, const ResourceMask& mask, const char *prefix) {
   char buf[256];
   buf[0] = 0;
 
-  if (mask == ENCODE_ALL) {
+  if (mask.Equals(kEncodeAll)) {
     strcpy(buf, "all");
   } else {
     char num[8];
     int i;
 
     for (i = 0; i < kX86RegEnd; i++) {
-      if (mask & (1ULL << i)) {
+      if (mask.HasBit(i)) {
         snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
       }
     }
 
-    if (mask & ENCODE_CCODE) {
+    if (mask.HasBit(ResourceMask::kCCode)) {
       strcat(buf, "cc ");
     }
     /* Memory bits */
-    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
+    if (x86LIR && (mask.HasBit(ResourceMask::kDalvikReg))) {
       snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
                DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
                (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
     }
-    if (mask & ENCODE_LITERAL) {
+    if (mask.HasBit(ResourceMask::kLiteral)) {
       strcat(buf, "lit ");
     }
 
-    if (mask & ENCODE_HEAP_REF) {
+    if (mask.HasBit(ResourceMask::kHeapRef)) {
       strcat(buf, "heap ");
     }
-    if (mask & ENCODE_MUST_NOT_ALIAS) {
+    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
       strcat(buf, "noalias ");
     }
   }
@@ -445,7 +424,15 @@
 }
 
 RegStorage X86Mir2Lir::AllocateByteRegister() {
-  return AllocTypedTemp(false, kCoreReg);
+  RegStorage reg = AllocTypedTemp(false, kCoreReg);
+  if (!Gen64Bit()) {
+    DCHECK_LT(reg.GetRegNum(), rs_rX86_SP.GetRegNum());
+  }
+  return reg;
+}
+
+bool X86Mir2Lir::IsByteRegister(RegStorage reg) {
+  return Gen64Bit() || reg.GetRegNum() < rs_rX86_SP.GetRegNum();
 }
 
 /* Clobber all regs that might be used by an external C call */
@@ -482,6 +469,18 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
   LockTemp(rs_rX86_ARG3);
+  if (Gen64Bit()) {
+    LockTemp(rs_rX86_ARG4);
+    LockTemp(rs_rX86_ARG5);
+    LockTemp(rs_rX86_FARG0);
+    LockTemp(rs_rX86_FARG1);
+    LockTemp(rs_rX86_FARG2);
+    LockTemp(rs_rX86_FARG3);
+    LockTemp(rs_rX86_FARG4);
+    LockTemp(rs_rX86_FARG5);
+    LockTemp(rs_rX86_FARG6);
+    LockTemp(rs_rX86_FARG7);
+  }
 }
 
 /* To be used when explicitly managing register use */
@@ -490,14 +489,26 @@
   FreeTemp(rs_rX86_ARG1);
   FreeTemp(rs_rX86_ARG2);
   FreeTemp(rs_rX86_ARG3);
+  if (Gen64Bit()) {
+    FreeTemp(rs_rX86_ARG4);
+    FreeTemp(rs_rX86_ARG5);
+    FreeTemp(rs_rX86_FARG0);
+    FreeTemp(rs_rX86_FARG1);
+    FreeTemp(rs_rX86_FARG2);
+    FreeTemp(rs_rX86_FARG3);
+    FreeTemp(rs_rX86_FARG4);
+    FreeTemp(rs_rX86_FARG5);
+    FreeTemp(rs_rX86_FARG6);
+    FreeTemp(rs_rX86_FARG7);
+  }
 }
 
 bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
     switch (opcode) {
       case kX86LockCmpxchgMR:
       case kX86LockCmpxchgAR:
-      case kX86LockCmpxchg8bM:
-      case kX86LockCmpxchg8bA:
+      case kX86LockCmpxchg64M:
+      case kX86LockCmpxchg64A:
       case kX86XchgMR:
       case kX86Mfence:
         // Atomic memory instructions provide full barrier.
@@ -541,7 +552,7 @@
   } else {
     // Mark as a scheduling barrier.
     DCHECK(!mem_barrier->flags.use_def_invalid);
-    mem_barrier->u.m.def_mask = ENCODE_ALL;
+    mem_barrier->u.m.def_mask = &kEncodeAll;
   }
   return ret;
 #else
@@ -653,6 +664,14 @@
 }
 
 RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+  // X86_64 can handle any size.
+  if (Gen64Bit()) {
+    if (size == kReference) {
+      return kRefReg;
+    }
+    return kCoreReg;
+  }
+
   if (UNLIKELY(is_volatile)) {
     // On x86, atomic 64-bit load/store requires an fp register.
     // Smaller aligned load/store is atomic for both core and fp registers.
@@ -688,11 +707,31 @@
     rs_rX86_ARG1 = rs_rSI;
     rs_rX86_ARG2 = rs_rDX;
     rs_rX86_ARG3 = rs_rCX;
+    rs_rX86_ARG4 = rs_r8;
+    rs_rX86_ARG5 = rs_r9;
+    rs_rX86_FARG0 = rs_fr0;
+    rs_rX86_FARG1 = rs_fr1;
+    rs_rX86_FARG2 = rs_fr2;
+    rs_rX86_FARG3 = rs_fr3;
+    rs_rX86_FARG4 = rs_fr4;
+    rs_rX86_FARG5 = rs_fr5;
+    rs_rX86_FARG6 = rs_fr6;
+    rs_rX86_FARG7 = rs_fr7;
     rX86_ARG0 = rDI;
     rX86_ARG1 = rSI;
     rX86_ARG2 = rDX;
     rX86_ARG3 = rCX;
-    // TODO: ARG4(r8), ARG5(r9), floating point args.
+    rX86_ARG4 = r8;
+    rX86_ARG5 = r9;
+    rX86_FARG0 = fr0;
+    rX86_FARG1 = fr1;
+    rX86_FARG2 = fr2;
+    rX86_FARG3 = fr3;
+    rX86_FARG4 = fr4;
+    rX86_FARG5 = fr5;
+    rX86_FARG6 = fr6;
+    rX86_FARG7 = fr7;
+    rs_rX86_INVOKE_TGT = rs_rDI;
   } else {
     rs_rX86_SP = rs_rX86_SP_32;
 
@@ -700,23 +739,32 @@
     rs_rX86_ARG1 = rs_rCX;
     rs_rX86_ARG2 = rs_rDX;
     rs_rX86_ARG3 = rs_rBX;
+    rs_rX86_ARG4 = RegStorage::InvalidReg();
+    rs_rX86_ARG5 = RegStorage::InvalidReg();
+    rs_rX86_FARG0 = rs_rAX;
+    rs_rX86_FARG1 = rs_rCX;
+    rs_rX86_FARG2 = rs_rDX;
+    rs_rX86_FARG3 = rs_rBX;
+    rs_rX86_FARG4 = RegStorage::InvalidReg();
+    rs_rX86_FARG5 = RegStorage::InvalidReg();
+    rs_rX86_FARG6 = RegStorage::InvalidReg();
+    rs_rX86_FARG7 = RegStorage::InvalidReg();
     rX86_ARG0 = rAX;
     rX86_ARG1 = rCX;
     rX86_ARG2 = rDX;
     rX86_ARG3 = rBX;
+    rX86_FARG0 = rAX;
+    rX86_FARG1 = rCX;
+    rX86_FARG2 = rDX;
+    rX86_FARG3 = rBX;
+    rs_rX86_INVOKE_TGT = rs_rAX;
+    // TODO(64): Initialize with invalid reg
+//    rX86_ARG4 = RegStorage::InvalidReg();
+//    rX86_ARG5 = RegStorage::InvalidReg();
   }
-  rs_rX86_FARG0 = rs_rAX;
-  rs_rX86_FARG1 = rs_rCX;
-  rs_rX86_FARG2 = rs_rDX;
-  rs_rX86_FARG3 = rs_rBX;
   rs_rX86_RET0 = rs_rAX;
   rs_rX86_RET1 = rs_rDX;
-  rs_rX86_INVOKE_TGT = rs_rAX;
   rs_rX86_COUNT = rs_rCX;
-  rX86_FARG0 = rAX;
-  rX86_FARG1 = rCX;
-  rX86_FARG2 = rDX;
-  rX86_FARG3 = rBX;
   rX86_RET0 = rAX;
   rX86_RET1 = rDX;
   rX86_INVOKE_TGT = rAX;
@@ -775,6 +823,7 @@
     int r_base = TargetReg(kSp).GetReg();
     int displacement = SRegOffset(rl_dest.s_reg_low);
 
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
     AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
                               false /* is_load */, true /* is64bit */);
@@ -1062,7 +1111,10 @@
       } else {
         // Load the start index from stack, remembering that we pushed EDI.
         int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
-        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          Load32Disp(rs_rX86_SP, displacement, rs_rBX);
+        }
         OpRegReg(kOpXor, rs_rDI, rs_rDI);
         OpRegReg(kOpCmp, rs_rBX, rs_rDI);
         OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
@@ -1356,16 +1408,20 @@
 
   // Address the start of the method.
   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  rl_method = LoadValue(rl_method, kCoreReg);
+  if (rl_method.wide) {
+    rl_method = LoadValueWide(rl_method, kCoreReg);
+  } else {
+    rl_method = LoadValue(rl_method, kCoreReg);
+  }
 
   // Load the proper value from the literal area.
   // We don't know the proper offset for the value, so pick one that will force
   // 4 byte offset.  We will fix this up in the assembler later to have the right
   // value.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(),  256 /* bogus */);
   load->flags.fixup = kFixupLoad;
   load->target = data_target;
-  SetMemRefType(load, true, kLiteral);
 }
 
 void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) {
@@ -1676,4 +1732,465 @@
   return new_value;
 }
 
+// ------------ ABI support: mapping of args to physical registers -------------
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+  const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5};
+  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3,
+                                                  rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7};
+  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (is_double_or_float) {
+    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+      result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
+      if (result.Valid()) {
+        result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg());
+      }
+    }
+  } else {
+    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+      if (result.Valid()) {
+        result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg());
+      }
+    }
+  }
+  return result;
+}
+
+RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  max_mapped_in_ = -1;
+  is_there_stack_mapped_ = false;
+  for (int in_position = 0; in_position < count; in_position++) {
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+     if (reg.Valid()) {
+       mapping_[in_position] = reg;
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
+       if (reg.Is64BitSolo()) {
+         // We covered 2 args, so skip the next one
+         in_position++;
+       }
+     } else {
+       is_there_stack_mapped_ = true;
+     }
+  }
+  initialized_ = true;
+}
+
+RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!Gen64Bit()) {
+    return GetCoreArgMappingToPhysicalReg(arg_num);
+  }
+
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+    InToRegStorageX86_64Mapper mapper;
+    in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) {
+  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+  // Not used for 64-bit, TODO: Move X86_32 to the same framework
+  switch (core_arg_num) {
+    case 0:
+      return rs_rX86_ARG1;
+    case 1:
+      return rs_rX86_ARG2;
+    case 2:
+      return rs_rX86_ARG3;
+    default:
+      return RegStorage::InvalidReg();
+  }
+}
+
+// ---------End of ABI support: mapping of args to physical registers -------------
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+  if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method);
+  /*
+   * Dummy up a RegLocation for the incoming Method*
+   * It will attempt to keep kArg0 live (or copy it to home location
+   * if promoted).
+   */
+
+  RegLocation rl_src = rl_method;
+  rl_src.location = kLocPhysReg;
+  rl_src.reg = TargetReg(kArg0);
+  rl_src.home = false;
+  MarkLive(rl_src);
+  StoreValue(rl_method, rl_src);
+  // If Method* has been promoted, explicitly flush
+  if (rl_method.location == kLocPhysReg) {
+    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+  }
+
+  if (cu_->num_ins == 0) {
+    return;
+  }
+
+  int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+  /*
+   * Copy incoming arguments to their proper home locations.
+   * NOTE: an older version of dx had an issue in which
+   * it would reuse static method argument registers.
+   * This could result in the same Dalvik virtual register
+   * being promoted to both core and fp regs. To account for this,
+   * we only copy to the corresponding promoted physical register
+   * if it matches the type of the SSA name for the incoming
+   * argument.  It is also possible that long and double arguments
+   * end up half-promoted.  In those cases, we must flush the promoted
+   * half to memory as well.
+   */
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+  for (int i = 0; i < cu_->num_ins; i++) {
+    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+    RegStorage reg = RegStorage::InvalidReg();
+    // get reg corresponding to input
+    reg = GetArgMappingToPhysicalReg(i);
+
+    if (reg.Valid()) {
+      // If arriving in register
+      bool need_flush = true;
+      RegLocation* t_loc = &ArgLocs[i];
+      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+        need_flush = false;
+      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+        need_flush = false;
+      } else {
+        need_flush = true;
+      }
+
+      // For wide args, force flush if not fully promoted
+      if (t_loc->wide) {
+        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
+        // Is only half promoted?
+        need_flush |= (p_map->core_location != v_map->core_location) ||
+            (p_map->fp_location != v_map->fp_location);
+      }
+      if (need_flush) {
+        if (t_loc->wide && t_loc->fp) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+          // Increment i to skip the next one
+          i++;
+        } else if (t_loc->wide && !t_loc->fp) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+          // Increment i to skip the next one
+          i++;
+        } else {
+          Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+        }
+      }
+    } else {
+      // If arriving in frame & promoted
+      if (v_map->core_location == kLocPhysReg) {
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+      }
+      if (v_map->fp_location == kLocPhysReg) {
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      }
+    }
+  }
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.  Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+                                  const MethodReference& target_method,
+                                  uint32_t vtable_idx, uintptr_t direct_code,
+                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
+  if (!Gen64Bit()) {
+    return Mir2Lir::GenDalvikArgsNoRange(info,
+                                  call_state, pcrLabel, next_call_insn,
+                                  target_method,
+                                  vtable_idx, direct_code,
+                                  direct_method, type, skip_this);
+  }
+  return GenDalvikArgsRange(info,
+                       call_state, pcrLabel, next_call_insn,
+                       target_method,
+                       vtable_idx, direct_code,
+                       direct_method, type, skip_this);
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *
+ */
+int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+                                LIR** pcrLabel, NextCallInsn next_call_insn,
+                                const MethodReference& target_method,
+                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+                                InvokeType type, bool skip_this) {
+  if (!Gen64Bit()) {
+    return Mir2Lir::GenDalvikArgsRange(info, call_state,
+                                pcrLabel, next_call_insn,
+                                target_method,
+                                vtable_idx, direct_code, direct_method,
+                                type, skip_this);
+  }
+
+  /* If no arguments, just return */
+  if (info->num_arg_words == 0)
+    return call_state;
+
+  const int start_index = skip_this ? 1 : 0;
+
+  InToRegStorageX86_64Mapper mapper;
+  InToRegStorageMapping in_to_reg_storage_mapping;
+  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
+          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+
+  // Fisrt of all, check whether it make sense to use bulk copying
+  // Optimization is aplicable only for range case
+  // TODO: make a constant instead of 2
+  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+    // Scan the rest of the args - if in phys_reg flush to memory
+    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+      RegLocation loc = info->args[next_arg];
+      if (loc.wide) {
+        loc = UpdateLocWide(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
+        }
+        next_arg += 2;
+      } else {
+        loc = UpdateLoc(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
+        }
+        next_arg++;
+      }
+    }
+
+    // Logic below assumes that Method pointer is at offset zero from SP.
+    DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+    // The rest can be copied together
+    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set);
+
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    // Only davik regs are accessed in this loop; no next_call_insn() calls.
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    while (regs_left_to_pass_via_stack > 0) {
+      // This is based on the knowledge that the stack itself is 16-byte aligned.
+      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+      size_t bytes_to_move;
+
+      /*
+       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+       * We do this because we could potentially do a smaller move to align.
+       */
+      if (regs_left_to_pass_via_stack == 4 ||
+          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+        // Moving 128-bits via xmm register.
+        bytes_to_move = sizeof(uint32_t) * 4;
+
+        // Allocate a free xmm temp. Since we are working through the calling sequence,
+        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
+        // there are no free registers.
+        RegStorage temp = AllocTempDouble();
+
+        LIR* ld1 = nullptr;
+        LIR* ld2 = nullptr;
+        LIR* st1 = nullptr;
+        LIR* st2 = nullptr;
+
+        /*
+         * The logic is similar for both loads and stores. If we have 16-byte alignment,
+         * do an aligned move. If we have 8-byte alignment, then do the move in two
+         * parts. This approach prevents possible cache line splits. Finally, fall back
+         * to doing an unaligned move. In most cases we likely won't split the cache
+         * line but we cannot prove it and thus take a conservative approach.
+         */
+        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+        if (src_is_16b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+        } else if (src_is_8b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1),
+                            kMovHi128FP);
+        } else {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+        }
+
+        if (dest_is_16b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+        } else if (dest_is_8b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+                            temp, kMovHi128FP);
+        } else {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+        }
+
+        // TODO If we could keep track of aliasing information for memory accesses that are wider
+        // than 64-bit, we wouldn't need to set up a barrier.
+        if (ld1 != nullptr) {
+          if (ld2 != nullptr) {
+            // For 64-bit load we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+          } else {
+            // Set barrier for 128-bit load.
+            ld1->u.m.def_mask = &kEncodeAll;
+          }
+        }
+        if (st1 != nullptr) {
+          if (st2 != nullptr) {
+            // For 64-bit store we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+          } else {
+            // Set barrier for 128-bit store.
+            st1->u.m.def_mask = &kEncodeAll;
+          }
+        }
+
+        // Free the temporary used for the data movement.
+        FreeTemp(temp);
+      } else {
+        // Moving 32-bits via general purpose register.
+        bytes_to_move = sizeof(uint32_t);
+
+        // Instead of allocating a new temp, simply reuse one of the registers being used
+        // for argument passing.
+        RegStorage temp = TargetReg(kArg3);
+
+        // Now load the argument VR and store to the outs.
+        Load32Disp(TargetReg(kSp), current_src_offset, temp);
+        Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+      }
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+  }
+
+  // Now handle rest not registers if they are
+  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+    RegStorage regSingle = TargetReg(kArg2);
+    RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.wide) {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+            } else {
+              LoadValueDirectWideFixed(rl_arg, regWide);
+              StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+            }
+            i++;
+          } else {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+            } else {
+              LoadValueDirectFixed(rl_arg, regSingle);
+              StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+            }
+          }
+        }
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+    }
+  }
+
+  // Finish with mapped registers
+  for (int i = start_index; i <= last_mapped_in; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (rl_arg.wide) {
+        LoadValueDirectWideFixed(rl_arg, reg);
+        i++;
+      } else {
+        LoadValueDirectFixed(rl_arg, reg);
+      }
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+  }
+
+  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                           direct_code, direct_method, type);
+  if (pcrLabel) {
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      Load32Disp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
+  }
+  return call_state;
+}
+
 }  // namespace art
+
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 618b3a5..b93e3e8 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -89,11 +89,8 @@
     res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg());
   } else {
     // Note, there is no byte immediate form of a 32 bit immediate move.
-    if (r_dest.Is64Bit()) {
-      res = NewLIR2(kX86Mov64RI, r_dest.GetReg(), value);
-    } else {
-      res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
-    }
+    // 64-bit immediate is not supported by LIR structure
+    res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
   }
 
   if (r_dest_save.IsFloat()) {
@@ -120,8 +117,8 @@
 LIR* X86Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
   X86OpCode opcode = kX86Bkpt;
   switch (op) {
-    case kOpNeg: opcode = kX86Neg32R; break;
-    case kOpNot: opcode = kX86Not32R; break;
+    case kOpNeg: opcode = r_dest_src.Is64Bit() ? kX86Neg64R : kX86Neg32R; break;
+    case kOpNot: opcode = r_dest_src.Is64Bit() ? kX86Not64R : kX86Not32R; break;
     case kOpRev: opcode = kX86Bswap32R; break;
     case kOpBlx: opcode = kX86CallR; break;
     default:
@@ -138,6 +135,10 @@
     switch (op) {
       case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break;
       case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break;
+      case kOpLsl: opcode = kX86Sal64RI; break;
+      case kOpLsr: opcode = kX86Shr64RI; break;
+      case kOpAsr: opcode = kX86Sar64RI; break;
+      case kOpCmp: opcode = byte_imm ? kX86Cmp64RI8 : kX86Cmp64RI; break;
       default:
         LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
     }
@@ -189,6 +190,7 @@
 }
 
 LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
+    bool is64Bit = r_dest_src1.Is64Bit();
     X86OpCode opcode = kX86Nop;
     bool src2_must_be_cx = false;
     switch (op) {
@@ -207,33 +209,34 @@
         OpReg(kOpRev, r_dest_src1);
         return OpRegImm(kOpAsr, r_dest_src1, 16);
         // X86 binary opcodes
-      case kOpSub: opcode = kX86Sub32RR; break;
-      case kOpSbc: opcode = kX86Sbb32RR; break;
-      case kOpLsl: opcode = kX86Sal32RC; src2_must_be_cx = true; break;
-      case kOpLsr: opcode = kX86Shr32RC; src2_must_be_cx = true; break;
-      case kOpAsr: opcode = kX86Sar32RC; src2_must_be_cx = true; break;
-      case kOpMov: opcode = kX86Mov32RR; break;
-      case kOpCmp: opcode = kX86Cmp32RR; break;
-      case kOpAdd: opcode = kX86Add32RR; break;
-      case kOpAdc: opcode = kX86Adc32RR; break;
-      case kOpAnd: opcode = kX86And32RR; break;
-      case kOpOr:  opcode = kX86Or32RR; break;
-      case kOpXor: opcode = kX86Xor32RR; break;
+      case kOpSub: opcode = is64Bit ? kX86Sub64RR : kX86Sub32RR; break;
+      case kOpSbc: opcode = is64Bit ? kX86Sbb64RR : kX86Sbb32RR; break;
+      case kOpLsl: opcode = is64Bit ? kX86Sal64RC : kX86Sal32RC; src2_must_be_cx = true; break;
+      case kOpLsr: opcode = is64Bit ? kX86Shr64RC : kX86Shr32RC; src2_must_be_cx = true; break;
+      case kOpAsr: opcode = is64Bit ? kX86Sar64RC : kX86Sar32RC; src2_must_be_cx = true; break;
+      case kOpMov: opcode = is64Bit ? kX86Mov64RR : kX86Mov32RR; break;
+      case kOpCmp: opcode = is64Bit ? kX86Cmp64RR : kX86Cmp32RR; break;
+      case kOpAdd: opcode = is64Bit ? kX86Add64RR : kX86Add32RR; break;
+      case kOpAdc: opcode = is64Bit ? kX86Adc64RR : kX86Adc32RR; break;
+      case kOpAnd: opcode = is64Bit ? kX86And64RR : kX86And32RR; break;
+      case kOpOr:  opcode = is64Bit ? kX86Or64RR : kX86Or32RR; break;
+      case kOpXor: opcode = is64Bit ? kX86Xor64RR : kX86Xor32RR; break;
       case kOp2Byte:
         // TODO: there are several instances of this check.  A utility function perhaps?
         // TODO: Similar to Arm's reg < 8 check.  Perhaps add attribute checks to RegStorage?
         // Use shifts instead of a byte operand if the source can't be byte accessed.
         if (r_src2.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
-          NewLIR2(kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
-          NewLIR2(kX86Sal32RI, r_dest_src1.GetReg(), 24);
-          return NewLIR2(kX86Sar32RI, r_dest_src1.GetReg(), 24);
+          NewLIR2(is64Bit ? kX86Mov64RR : kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
+          NewLIR2(is64Bit ? kX86Sal64RI : kX86Sal32RI, r_dest_src1.GetReg(), is64Bit ? 56 : 24);
+          return NewLIR2(is64Bit ? kX86Sar64RI : kX86Sar32RI, r_dest_src1.GetReg(),
+                         is64Bit ? 56 : 24);
         } else {
-          opcode = kX86Movsx8RR;
+          opcode = is64Bit ? kX86Bkpt : kX86Movsx8RR;
         }
         break;
-      case kOp2Short: opcode = kX86Movsx16RR; break;
-      case kOp2Char: opcode = kX86Movzx16RR; break;
-      case kOpMul: opcode = kX86Imul32RR; break;
+      case kOp2Short: opcode = is64Bit ? kX86Bkpt : kX86Movsx16RR; break;
+      case kOp2Char: opcode = is64Bit ? kX86Bkpt : kX86Movzx16RR; break;
+      case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RR; break;
       default:
         LOG(FATAL) << "Bad case in OpRegReg " << op;
         break;
@@ -354,16 +357,17 @@
 }
 
 LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
+  bool is64Bit = r_dest.Is64Bit();
   X86OpCode opcode = kX86Nop;
   switch (op) {
       // X86 binary opcodes
-    case kOpSub: opcode = kX86Sub32RM; break;
-    case kOpMov: opcode = kX86Mov32RM; break;
-    case kOpCmp: opcode = kX86Cmp32RM; break;
-    case kOpAdd: opcode = kX86Add32RM; break;
-    case kOpAnd: opcode = kX86And32RM; break;
-    case kOpOr:  opcode = kX86Or32RM; break;
-    case kOpXor: opcode = kX86Xor32RM; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
     case kOp2Byte: opcode = kX86Movsx8RM; break;
     case kOp2Short: opcode = kX86Movsx16RM; break;
     case kOp2Char: opcode = kX86Movzx16RM; break;
@@ -373,7 +377,8 @@
       break;
   }
   LIR *l = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), offset);
-  if (r_base == rs_rX86_SP) {
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    DCHECK(r_base == rs_rX86_SP);
     AnnotateDalvikRegAccess(l, offset >> 2, true /* is_load */, false /* is_64bit */);
   }
   return l;
@@ -382,63 +387,72 @@
 LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) {
   DCHECK_NE(rl_dest.location, kLocPhysReg);
   int displacement = SRegOffset(rl_dest.s_reg_low);
+  bool is64Bit = rl_dest.wide != 0;
   X86OpCode opcode = kX86Nop;
   switch (op) {
-    case kOpSub: opcode = kX86Sub32MR; break;
-    case kOpMov: opcode = kX86Mov32MR; break;
-    case kOpCmp: opcode = kX86Cmp32MR; break;
-    case kOpAdd: opcode = kX86Add32MR; break;
-    case kOpAnd: opcode = kX86And32MR; break;
-    case kOpOr:  opcode = kX86Or32MR; break;
-    case kOpXor: opcode = kX86Xor32MR; break;
-    case kOpLsl: opcode = kX86Sal32MC; break;
-    case kOpLsr: opcode = kX86Shr32MC; break;
-    case kOpAsr: opcode = kX86Sar32MC; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64MR : kX86Sub32MR; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64MR : kX86Mov32MR; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64MR : kX86Cmp32MR; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64MR : kX86Add32MR; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64MR : kX86And32MR; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64MR : kX86Or32MR; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64MR : kX86Xor32MR; break;
+    case kOpLsl: opcode = is64Bit ? kX86Sal64MC : kX86Sal32MC; break;
+    case kOpLsr: opcode = is64Bit ? kX86Shr64MC : kX86Shr32MC; break;
+    case kOpAsr: opcode = is64Bit ? kX86Sar64MC : kX86Sar32MC; break;
     default:
       LOG(FATAL) << "Bad case in OpMemReg " << op;
       break;
   }
   LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value);
-  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
-  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */);
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
+    AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */);
+  }
   return l;
 }
 
 LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) {
   DCHECK_NE(rl_value.location, kLocPhysReg);
+  bool is64Bit = r_dest.Is64Bit();
   int displacement = SRegOffset(rl_value.s_reg_low);
   X86OpCode opcode = kX86Nop;
   switch (op) {
-    case kOpSub: opcode = kX86Sub32RM; break;
-    case kOpMov: opcode = kX86Mov32RM; break;
-    case kOpCmp: opcode = kX86Cmp32RM; break;
-    case kOpAdd: opcode = kX86Add32RM; break;
-    case kOpAnd: opcode = kX86And32RM; break;
-    case kOpOr:  opcode = kX86Or32RM; break;
-    case kOpXor: opcode = kX86Xor32RM; break;
-    case kOpMul: opcode = kX86Imul32RM; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
+    case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RM; break;
     default:
       LOG(FATAL) << "Bad case in OpRegMem " << op;
       break;
   }
   LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement);
-  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
+  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+    AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
+  }
   return l;
 }
 
 LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1,
                              RegStorage r_src2) {
+  bool is64Bit = r_dest.Is64Bit();
   if (r_dest != r_src1 && r_dest != r_src2) {
     if (op == kOpAdd) {  // lea special case, except can't encode rbp as base
       if (r_src1 == r_src2) {
         OpRegCopy(r_dest, r_src1);
         return OpRegImm(kOpLsl, r_dest, 1);
       } else if (r_src1 != rs_rBP) {
-        return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src1.GetReg() /* base */,
-                       r_src2.GetReg() /* index */, 0 /* scale */, 0 /* disp */);
+        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                       r_src1.GetReg() /* base */, r_src2.GetReg() /* index */,
+                       0 /* scale */, 0 /* disp */);
       } else {
-        return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src2.GetReg() /* base */,
-                       r_src1.GetReg() /* index */, 0 /* scale */, 0 /* disp */);
+        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                       r_src2.GetReg() /* base */, r_src1.GetReg() /* index */,
+                       0 /* scale */, 0 /* disp */);
       }
     } else {
       OpRegCopy(r_dest, r_src1);
@@ -476,10 +490,10 @@
 }
 
 LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int value) {
-  if (op == kOpMul) {
+  if (op == kOpMul && !Gen64Bit()) {
     X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
     return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value);
-  } else if (op == kOpAnd) {
+  } else if (op == kOpAnd && !Gen64Bit()) {
     if (value == 0xFF && r_src.Low4()) {
       return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg());
     } else if (value == 0xFFFF) {
@@ -492,8 +506,9 @@
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(),  r5sib_no_base /* base */,
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
-      return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */,
-                     rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */);
+      return NewLIR5(r_dest.Is64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                     r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */,
+                     0 /* scale */, value /* disp */);
     }
     OpRegCopy(r_dest, r_src);
   }
@@ -556,17 +571,21 @@
 
         // Address the start of the method
         RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-        rl_method = LoadValue(rl_method, kCoreReg);
+        if (rl_method.wide) {
+          rl_method = LoadValueWide(rl_method, kCoreReg);
+        } else {
+          rl_method = LoadValue(rl_method, kCoreReg);
+        }
 
         // Load the proper value from the literal area.
         // We don't know the proper offset for the value, so pick one that will force
         // 4 byte offset.  We will fix this up in the assembler later to have the right
         // value.
+        ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
         res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
                            kDouble);
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
-        SetMemRefType(res, true, kLiteral);
         store_method_addr_used_ = true;
       } else {
         if (val_lo == 0) {
@@ -582,8 +601,20 @@
         }
       }
     } else {
-      res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
-      LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
+      if (r_dest.IsPair()) {
+        res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
+        LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
+      } else {
+        // TODO(64) make int64_t value parameter of LoadConstantNoClobber
+        if (val_lo < 0) {
+          val_hi += 1;
+        }
+        res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi);
+        NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32);
+        if (val_lo != 0) {
+          NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo);
+        }
+      }
     }
     return res;
 }
@@ -601,6 +632,8 @@
     case kDouble:
       if (r_dest.IsFloat()) {
         opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
+      } else if (!pair) {
+        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
       } else {
         opcode = is_array ? kX86Mov32RA  : kX86Mov32RM;
       }
@@ -657,7 +690,8 @@
                         displacement + HIWORD_OFFSET);
       }
     }
-    if (r_base == rs_rX86_SP) {
+    if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+      DCHECK(r_base == rs_rX86_SP);
       AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
                               true /* is_load */, is64bit);
       if (pair) {
@@ -742,13 +776,10 @@
     case kDouble:
       if (r_src.IsFloat()) {
         opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
+      } else if (!pair) {
+        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
       } else {
-        if (Gen64Bit()) {
-          opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
-        } else {
-          // TODO(64): pair = true;
-          opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
-        }
+        opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
       }
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
@@ -791,7 +822,8 @@
       store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetLowReg());
       store2 = NewLIR3(opcode, r_base.GetReg(), displacement + HIWORD_OFFSET, r_src.GetHighReg());
     }
-    if (r_base == rs_rX86_SP) {
+    if (mem_ref_type_ == ResourceMask::kDalvikReg) {
+      DCHECK(r_base == rs_rX86_SP);
       AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
                               false /* is_load */, is64bit);
       if (pair) {
@@ -855,7 +887,7 @@
 
   // Did we need a pointer to the method code?
   if (store_method_addr_) {
-    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false);
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, Gen64Bit() == true);
   } else {
     base_of_code_ = nullptr;
   }
@@ -971,6 +1003,7 @@
       loc.location = kLocDalvikFrame;
     }
   }
+  DCHECK(CheckCorePoolSanity());
   return loc;
 }
 
@@ -984,7 +1017,7 @@
       loc.location = kLocDalvikFrame;
     }
   }
+  DCHECK(CheckCorePoolSanity());
   return loc;
 }
-
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index bb8df89..f1b5811 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -111,9 +111,6 @@
   kX86RegEnd   = kX86FPStack,
 };
 
-#define ENCODE_X86_REG_SP           (1ULL << kX86RegSP)
-#define ENCODE_X86_FP_STACK         (1ULL << kX86FPStack)
-
 // FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum?
 enum X86NativeRegisterPool {
   r0             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0,
@@ -142,10 +139,6 @@
   r7             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
   r7q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 7,
   rDI            = r7,
-#ifndef TARGET_REX_SUPPORT
-  // fake return address register for core spill mask.
-  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
-#else
   r8             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
   r8q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 8,
   r9             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
@@ -164,7 +157,6 @@
   r15q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 15,
   // fake return address register for core spill mask.
   rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
-#endif
 
   // xmm registers, single precision view.
   fr0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
@@ -175,7 +167,6 @@
   fr5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
   fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
   fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
-#ifdef TARGET_REX_SUPPORT
   fr8  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8,
   fr9  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9,
   fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
@@ -184,7 +175,6 @@
   fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
   fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
   fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
-#endif
 
   // xmm registers, double precision aliases.
   dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
@@ -195,7 +185,6 @@
   dr5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
   dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
   dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
-#ifdef TARGET_REX_SUPPORT
   dr8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8,
   dr9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9,
   dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
@@ -204,7 +193,6 @@
   dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
   dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
   dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-#endif
 
   // xmm registers, quad precision aliases
   xr0  = RegStorage::k128BitSolo | 0,
@@ -215,7 +203,6 @@
   xr5  = RegStorage::k128BitSolo | 5,
   xr6  = RegStorage::k128BitSolo | 6,
   xr7  = RegStorage::k128BitSolo | 7,
-#ifdef TARGET_REX_SUPPORT
   xr8  = RegStorage::k128BitSolo | 8,
   xr9  = RegStorage::k128BitSolo | 9,
   xr10 = RegStorage::k128BitSolo | 10,
@@ -224,7 +211,6 @@
   xr13 = RegStorage::k128BitSolo | 13,
   xr14 = RegStorage::k128BitSolo | 14,
   xr15 = RegStorage::k128BitSolo | 15,
-#endif
 
   // TODO: as needed, add 256, 512 and 1024-bit xmm views.
 };
@@ -254,7 +240,6 @@
 constexpr RegStorage rs_r7q(RegStorage::kValid | r7q);
 constexpr RegStorage rs_rDI = rs_r7;
 constexpr RegStorage rs_rRET(RegStorage::kValid | rRET);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_r8(RegStorage::kValid | r8);
 constexpr RegStorage rs_r8q(RegStorage::kValid | r8q);
 constexpr RegStorage rs_r9(RegStorage::kValid | r9);
@@ -271,7 +256,6 @@
 constexpr RegStorage rs_r14q(RegStorage::kValid | r14q);
 constexpr RegStorage rs_r15(RegStorage::kValid | r15);
 constexpr RegStorage rs_r15q(RegStorage::kValid | r15q);
-#endif
 
 constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
 constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
@@ -281,7 +265,6 @@
 constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
 constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
 constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
 constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
 constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
@@ -290,7 +273,6 @@
 constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
 constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
 constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
-#endif
 
 constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
 constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
@@ -300,7 +282,6 @@
 constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
 constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
 constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
 constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
 constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
@@ -309,7 +290,6 @@
 constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
 constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
 constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
-#endif
 
 constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
 constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
@@ -319,7 +299,6 @@
 constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
 constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
 constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
-#ifdef TARGET_REX_SUPPORT
 constexpr RegStorage rs_xr8(RegStorage::kValid | xr8);
 constexpr RegStorage rs_xr9(RegStorage::kValid | xr9);
 constexpr RegStorage rs_xr10(RegStorage::kValid | xr10);
@@ -328,16 +307,21 @@
 constexpr RegStorage rs_xr13(RegStorage::kValid | xr13);
 constexpr RegStorage rs_xr14(RegStorage::kValid | xr14);
 constexpr RegStorage rs_xr15(RegStorage::kValid | xr15);
-#endif
 
 extern X86NativeRegisterPool rX86_ARG0;
 extern X86NativeRegisterPool rX86_ARG1;
 extern X86NativeRegisterPool rX86_ARG2;
 extern X86NativeRegisterPool rX86_ARG3;
+extern X86NativeRegisterPool rX86_ARG4;
+extern X86NativeRegisterPool rX86_ARG5;
 extern X86NativeRegisterPool rX86_FARG0;
 extern X86NativeRegisterPool rX86_FARG1;
 extern X86NativeRegisterPool rX86_FARG2;
 extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_FARG4;
+extern X86NativeRegisterPool rX86_FARG5;
+extern X86NativeRegisterPool rX86_FARG6;
+extern X86NativeRegisterPool rX86_FARG7;
 extern X86NativeRegisterPool rX86_RET0;
 extern X86NativeRegisterPool rX86_RET1;
 extern X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -347,10 +331,16 @@
 extern RegStorage rs_rX86_ARG1;
 extern RegStorage rs_rX86_ARG2;
 extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_ARG4;
+extern RegStorage rs_rX86_ARG5;
 extern RegStorage rs_rX86_FARG0;
 extern RegStorage rs_rX86_FARG1;
 extern RegStorage rs_rX86_FARG2;
 extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_FARG4;
+extern RegStorage rs_rX86_FARG5;
+extern RegStorage rs_rX86_FARG6;
+extern RegStorage rs_rX86_FARG7;
 extern RegStorage rs_rX86_RET0;
 extern RegStorage rs_rX86_RET1;
 extern RegStorage rs_rX86_INVOKE_TGT;
@@ -363,6 +353,9 @@
 const RegLocation x86_loc_c_return_wide
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
+const RegLocation x86_64_loc_c_return_wide
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
 const RegLocation x86_loc_c_return_float
     {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG};
@@ -505,6 +498,7 @@
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
   kx86Cdq32Da,
+  kx86Cqo64Da,
   kX86Bswap32R,
   kX86Push32R, kX86Pop32R,
 #undef UnaryOpcode
@@ -518,8 +512,12 @@
   kX86MovssAR,
   Binary0fOpCode(kX86Cvtsi2sd),  // int to double
   Binary0fOpCode(kX86Cvtsi2ss),  // int to float
+  Binary0fOpCode(kX86Cvtsqi2sd),  // long to double
+  Binary0fOpCode(kX86Cvtsqi2ss),  // long to float
   Binary0fOpCode(kX86Cvttsd2si),  // truncating double to int
   Binary0fOpCode(kX86Cvttss2si),  // truncating float to int
+  Binary0fOpCode(kX86Cvttsd2sqi),  // truncating double to long
+  Binary0fOpCode(kX86Cvttss2sqi),  // truncating float to long
   Binary0fOpCode(kX86Cvtsd2si),  // rounding double to int
   Binary0fOpCode(kX86Cvtss2si),  // rounding float to int
   Binary0fOpCode(kX86Ucomisd),  // unordered double compare
@@ -587,14 +585,18 @@
   kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
   kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
   Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
+  Binary0fOpCode(kX86Movqxr),   // move into xmm from 64 bit gpr
+  kX86MovqrxRR, kX86MovqrxMR, kX86MovqrxAR,  // move into 64 bit reg from xmm
   kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
+  kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA,  // move 32 bit to 64 bit with sign extension
   kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
   kX86Mfence,                   // memory barrier
   Binary0fOpCode(kX86Imul16),   // 16bit multiply
   Binary0fOpCode(kX86Imul32),   // 32bit multiply
+  Binary0fOpCode(kX86Imul64),   // 64bit multiply
   kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
   kX86LockCmpxchgMR, kX86LockCmpxchgAR,  // locked compare and exchange
-  kX86LockCmpxchg8bM, kX86LockCmpxchg8bA,  // locked compare and exchange
+  kX86LockCmpxchg64M, kX86LockCmpxchg64A,  // locked compare and exchange
   kX86XchgMR,  // exchange memory with register (automatically locked)
   Binary0fOpCode(kX86Movzx8),   // zero-extend 8-bit value
   Binary0fOpCode(kX86Movzx16),  // zero-extend 16-bit value
@@ -628,7 +630,6 @@
   kData,                                    // Special case for raw data.
   kNop,                                     // Special case for variable length nop.
   kNullary,                                 // Opcode that takes no arguments.
-  kPrefix2Nullary,                          // Opcode that takes no arguments, but 2 prefixes.
   kRegOpcode,                               // Shorter form of R instruction kind (opcode+rd)
   kReg, kMem, kArray,                       // R, M and A instruction kinds.
   kMemReg, kArrayReg, kThreadReg,           // MR, AR and TR instruction kinds.
@@ -637,11 +638,11 @@
   kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,     // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                               // Shorter form move RI.
-  kRegRegImmRev,                            // RRI with first reg in r/m
+  kRegRegImmStore,                          // RRI following the store modrm reg-reg encoding rather than the load.
   kMemRegImm,                               // MRI instruction kinds.
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
-  kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
+  // kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
   kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
   kRegRegCond,                             // RR instruction kind followed by a condition.
   kRegMemCond,                             // RM instruction kind followed by a condition.
@@ -654,19 +655,25 @@
 /* Struct used to define the EncodingMap positions for each X86 opcode */
 struct X86EncodingMap {
   X86OpCode opcode;      // e.g. kOpAddRI
-  X86EncodingKind kind;  // Used to discriminate in the union below
+  // The broad category the instruction conforms to, such as kRegReg. Identifies which LIR operands
+  // hold meaning for the opcode.
+  X86EncodingKind kind;
   uint64_t flags;
   struct {
-  uint8_t prefix1;       // non-zero => a prefix byte
-  uint8_t prefix2;       // non-zero => a second prefix byte
-  uint8_t opcode;        // 1 byte opcode
-  uint8_t extra_opcode1;  // possible extra opcode byte
-  uint8_t extra_opcode2;  // possible second extra opcode byte
-  // 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
-  // encoding kind
+  uint8_t prefix1;       // Non-zero => a prefix byte.
+  uint8_t prefix2;       // Non-zero => a second prefix byte.
+  uint8_t opcode;        // 1 byte opcode.
+  uint8_t extra_opcode1;  // Possible extra opcode byte.
+  uint8_t extra_opcode2;  // Possible second extra opcode byte.
+  // 3-bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
+  // encoding kind.
   uint8_t modrm_opcode;
-  uint8_t ax_opcode;  // non-zero => shorter encoding for AX as a destination
-  uint8_t immediate_bytes;  // number of bytes of immediate
+  uint8_t ax_opcode;  // Non-zero => shorter encoding for AX as a destination.
+  uint8_t immediate_bytes;  // Number of bytes of immediate.
+  // Does the instruction address a byte register? In 32-bit mode the registers ah, bh, ch and dh
+  // are not used. In 64-bit mode the REX prefix is used to normalize and allow any byte register
+  // to be addressed.
+  bool r8_form;
   } skeleton;
   const char *name;
   const char* fmt;
@@ -700,6 +707,7 @@
 
 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
 #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
+#define IS_SIMM32(v) ((INT64_C(-2147483648) <= (v)) && ((v) <= INT64_C(2147483647)))
 
 extern X86EncodingMap EncodingMap[kX86Last];
 extern X86ConditionCode X86ConditionEncoding(ConditionCode cond);
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 7e50c31..3b891f2 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEX_REG_STORAGE_H_
 #define ART_COMPILER_DEX_REG_STORAGE_H_
 
+#include "base/logging.h"
 
 namespace art {
 
@@ -102,17 +103,21 @@
   static const uint16_t kHighRegMask = (kHighRegNumMask << kHighRegShift);
 
   // Reg is [F][LLLLL], will override any existing shape and use rs_kind.
-  RegStorage(RegStorageKind rs_kind, int reg) {
-    DCHECK_NE(rs_kind, k64BitPair);
-    DCHECK_EQ(rs_kind & ~kShapeMask, 0);
-    reg_ = kValid | rs_kind | (reg & kRegTypeMask);
+  constexpr RegStorage(RegStorageKind rs_kind, int reg)
+      : reg_(
+          DCHECK_CONSTEXPR(rs_kind != k64BitPair, , 0u)
+          DCHECK_CONSTEXPR((rs_kind & ~kShapeMask) == 0, , 0u)
+          kValid | rs_kind | (reg & kRegTypeMask)) {
   }
-  RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) {
-    DCHECK_EQ(rs_kind, k64BitPair);
-    DCHECK_EQ(low_reg & kFloatingPoint, high_reg & kFloatingPoint);
-    DCHECK_LE(high_reg & kRegNumMask, kHighRegNumMask) << "High reg must be in 0..31";
-    reg_ = kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) |
-        (low_reg & kRegTypeMask);
+  constexpr RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg)
+      : reg_(
+          DCHECK_CONSTEXPR(rs_kind == k64BitPair, << rs_kind, 0u)
+          DCHECK_CONSTEXPR((low_reg & kFloatingPoint) == (high_reg & kFloatingPoint),
+                           << low_reg << ", " << high_reg, 0u)
+          DCHECK_CONSTEXPR((high_reg & kRegNumMask) <= kHighRegNumMask,
+                           << "High reg must be in 0..31: " << high_reg, false)
+          kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) |
+                  (low_reg & kRegTypeMask)) {
   }
   constexpr explicit RegStorage(uint16_t val) : reg_(val) {}
   RegStorage() : reg_(kInvalid) {}
@@ -125,50 +130,53 @@
     return (reg_ != rhs.GetRawBits());
   }
 
-  bool Valid() const {
+  constexpr bool Valid() const {
     return ((reg_ & kValidMask) == kValid);
   }
 
-  bool Is32Bit() const {
+  constexpr bool Is32Bit() const {
     return ((reg_ & kShapeMask) == k32BitSolo);
   }
 
-  bool Is64Bit() const {
+  constexpr bool Is64Bit() const {
     return ((reg_ & k64BitMask) == k64Bits);
   }
 
-  bool Is64BitSolo() const {
+  constexpr bool Is64BitSolo() const {
     return ((reg_ & kShapeMask) == k64BitSolo);
   }
 
-  bool IsPair() const {
+  constexpr bool IsPair() const {
     return ((reg_ & kShapeMask) == k64BitPair);
   }
 
-  bool IsFloat() const {
-    DCHECK(Valid());
-    return ((reg_ & kFloatingPoint) == kFloatingPoint);
+  constexpr bool IsFloat() const {
+    return
+        DCHECK_CONSTEXPR(Valid(), , false)
+        ((reg_ & kFloatingPoint) == kFloatingPoint);
   }
 
-  bool IsDouble() const {
-    DCHECK(Valid());
-    return (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits);
+  constexpr bool IsDouble() const {
+    return
+        DCHECK_CONSTEXPR(Valid(), , false)
+        (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits);
   }
 
-  bool IsSingle() const {
-    DCHECK(Valid());
-    return (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
+  constexpr bool IsSingle() const {
+    return
+        DCHECK_CONSTEXPR(Valid(), , false)
+        (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
   }
 
-  static bool IsFloat(uint16_t reg) {
+  static constexpr bool IsFloat(uint16_t reg) {
     return ((reg & kFloatingPoint) == kFloatingPoint);
   }
 
-  static bool IsDouble(uint16_t reg) {
+  static constexpr bool IsDouble(uint16_t reg) {
     return (reg & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits);
   }
 
-  static bool IsSingle(uint16_t reg) {
+  static constexpr bool IsSingle(uint16_t reg) {
     return (reg & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
   }
 
@@ -221,17 +229,17 @@
   }
 
   // Return the register number of low or solo.
-  int GetRegNum() const {
+  constexpr int GetRegNum() const {
     return reg_ & kRegNumMask;
   }
 
   // Is register number in 0..7?
-  bool Low8() const {
+  constexpr bool Low8() const {
     return GetRegNum() < 8;
   }
 
   // Is register number in 0..3?
-  bool Low4() const {
+  constexpr bool Low4() const {
     return GetRegNum() < 4;
   }
 
@@ -244,11 +252,11 @@
     return RegStorage(k64BitPair, low.GetReg(), high.GetReg());
   }
 
-  static bool SameRegType(RegStorage reg1, RegStorage reg2) {
+  static constexpr bool SameRegType(RegStorage reg1, RegStorage reg2) {
     return (reg1.IsDouble() == reg2.IsDouble()) && (reg1.IsSingle() == reg2.IsSingle());
   }
 
-  static bool SameRegType(int reg1, int reg2) {
+  static constexpr bool SameRegType(int reg1, int reg2) {
     return (IsDouble(reg1) == IsDouble(reg2)) && (IsSingle(reg1) == IsSingle(reg2));
   }
 
@@ -258,17 +266,17 @@
   }
 
   // Create a floating point 32-bit solo.
-  static RegStorage FloatSolo32(int reg_num) {
+  static constexpr RegStorage FloatSolo32(int reg_num) {
     return RegStorage(k32BitSolo, (reg_num & kRegNumMask) | kFloatingPoint);
   }
 
   // Create a 128-bit solo.
-  static RegStorage Solo128(int reg_num) {
+  static constexpr RegStorage Solo128(int reg_num) {
     return RegStorage(k128BitSolo, reg_num & kRegTypeMask);
   }
 
   // Create a 64-bit solo.
-  static RegStorage Solo64(int reg_num) {
+  static constexpr RegStorage Solo64(int reg_num) {
     return RegStorage(k64BitSolo, reg_num & kRegTypeMask);
   }
 
@@ -277,19 +285,19 @@
     return RegStorage(k64BitSolo, (reg_num & kRegNumMask) | kFloatingPoint);
   }
 
-  static RegStorage InvalidReg() {
+  static constexpr RegStorage InvalidReg() {
     return RegStorage(kInvalid);
   }
 
-  static uint16_t RegNum(int raw_reg_bits) {
+  static constexpr uint16_t RegNum(int raw_reg_bits) {
     return raw_reg_bits & kRegNumMask;
   }
 
-  int GetRawBits() const {
+  constexpr int GetRawBits() const {
     return reg_;
   }
 
-  size_t StorageSize() {
+  size_t StorageSize() const {
     switch (reg_ & kShapeMask) {
       case kInvalid: return 0;
       case k32BitSolo: return 4;
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 4324325..e26745a 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -117,6 +117,16 @@
   RecordDFSOrders(GetEntryBlock());
 
   num_reachable_blocks_ = dfs_order_->Size();
+
+  if (num_reachable_blocks_ != num_blocks_) {
+    // Hide all unreachable blocks.
+    AllNodesIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+      if (!bb->visited) {
+        bb->Hide(cu_);
+      }
+    }
+  }
 }
 
 /*
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index c4af9cb..db383c4 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -123,6 +123,16 @@
  */
 bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
   SSARepresentation *ssa_rep = mir->ssa_rep;
+
+  /*
+   * The dex bytecode definition does not explicitly outlaw the definition of the same
+   * virtual register to be used in both a 32-bit and 64-bit pair context.  However, dx
+   * does not generate this pattern (at least recently).  Further, in the next revision of
+   * dex, we will forbid this.  To support the few cases in the wild, detect this pattern
+   * and punt to the interpreter.
+   */
+  bool type_mismatch = false;
+
   if (ssa_rep) {
     uint64_t attrs = GetDataFlowAttributes(mir);
     const int* uses = ssa_rep->uses;
@@ -145,6 +155,7 @@
       }
     }
 
+
     // Handles uses
     int next = 0;
     if (attrs & DF_UA) {
@@ -162,6 +173,7 @@
         SRegToVReg(uses[next + 1]));
         next += 2;
       } else {
+        type_mismatch |= reg_location_[uses[next]].wide;
         next++;
       }
     }
@@ -180,6 +192,7 @@
                              SRegToVReg(uses[next + 1]));
         next += 2;
       } else {
+        type_mismatch |= reg_location_[uses[next]].wide;
         next++;
       }
     }
@@ -196,6 +209,8 @@
         reg_location_[uses[next + 1]].high_word = true;
         DCHECK_EQ(SRegToVReg(uses[next])+1,
         SRegToVReg(uses[next + 1]));
+      } else {
+        type_mismatch |= reg_location_[uses[next]].wide;
       }
     }
 
@@ -205,6 +220,7 @@
         (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT)) {
       switch (cu_->shorty[0]) {
           case 'I':
+            type_mismatch |= reg_location_[uses[0]].wide;
             changed |= SetCore(uses[0]);
             break;
           case 'J':
@@ -215,6 +231,7 @@
             reg_location_[uses[1]].high_word = true;
             break;
           case 'F':
+            type_mismatch |= reg_location_[uses[0]].wide;
             changed |= SetFp(uses[0]);
             break;
           case 'D':
@@ -225,6 +242,7 @@
             reg_location_[uses[1]].high_word = true;
             break;
           case 'L':
+            type_mismatch |= reg_location_[uses[0]].wide;
             changed |= SetRef(uses[0]);
             break;
           default: break;
@@ -261,6 +279,7 @@
           (mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC_RANGE))) {
         reg_location_[uses[next]].defined = true;
         reg_location_[uses[next]].ref = true;
+        type_mismatch |= reg_location_[uses[next]].wide;
         next++;
       }
       uint32_t cpos = 1;
@@ -286,12 +305,15 @@
               i++;
               break;
             case 'F':
+              type_mismatch |= reg_location_[uses[i]].wide;
               ssa_rep->fp_use[i] = true;
               break;
             case 'L':
+              type_mismatch |= reg_location_[uses[i]].wide;
               changed |= SetRef(uses[i]);
               break;
             default:
+              type_mismatch |= reg_location_[uses[i]].wide;
               changed |= SetCore(uses[i]);
               break;
           }
@@ -367,6 +389,12 @@
       }
     }
   }
+  if (type_mismatch) {
+    LOG(WARNING) << "Deprecated dex type mismatch, interpreting "
+                 << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    LOG(INFO) << "@ 0x" << std::hex << mir->offset;
+    SetPuntToInterpreter(true);
+  }
   return changed;
 }
 
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 45abfcc..324f717 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -135,8 +135,10 @@
       } else {
         // Search dex file for localized ssb index, may fail if field's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
+        StackHandleScope<1> hs(Thread::Current());
         const DexFile::StringId* string_id =
-            dex_file->FindStringId(FieldHelper(resolved_field).GetDeclaringClassDescriptor());
+            dex_file->FindStringId(
+                FieldHelper(hs.NewHandle(resolved_field)).GetDeclaringClassDescriptor());
         if (string_id != nullptr) {
           const DexFile::TypeId* type_id =
              dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 1cfd194..3e326f0 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -560,9 +560,8 @@
                     soa.AddLocalReference<jobject>(method->GetDeclaringClass()->GetClassLoader()));
     jclass_loader = soa.Env()->NewGlobalRef(local_class_loader.get());
     // Find the dex_file
-    MethodHelper mh(method);
-    dex_file = &mh.GetDexFile();
-    class_def_idx = mh.GetClassDefIndex();
+    dex_file = method->GetDexFile();
+    class_def_idx = method->GetClassDefIndex();
   }
   const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
   self->TransitionFromRunnableToSuspended(kNative);
@@ -630,7 +629,7 @@
 static void ResolveExceptionsForMethod(MethodHelper* mh,
     std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const DexFile::CodeItem* code_item = mh->GetCodeItem();
+  const DexFile::CodeItem* code_item = mh->GetMethod()->GetCodeItem();
   if (code_item == NULL) {
     return;  // native or abstract method
   }
@@ -650,10 +649,10 @@
       uint16_t encoded_catch_handler_handlers_type_idx =
           DecodeUnsignedLeb128(&encoded_catch_handler_list);
       // Add to set of types to resolve if not already in the dex cache resolved types
-      if (!mh->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
+      if (!mh->GetMethod()->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
         exceptions_to_resolve.insert(
             std::pair<uint16_t, const DexFile*>(encoded_catch_handler_handlers_type_idx,
-                                                &mh->GetDexFile()));
+                                                mh->GetMethod()->GetDexFile()));
       }
       // ignore address associated with catch handler
       DecodeUnsignedLeb128(&encoded_catch_handler_list);
@@ -669,15 +668,14 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::set<std::pair<uint16_t, const DexFile*>>* exceptions_to_resolve =
       reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg);
-  MethodHelper mh;
+  StackHandleScope<1> hs(Thread::Current());
+  MethodHelper mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
   for (size_t i = 0; i < c->NumVirtualMethods(); ++i) {
-    mirror::ArtMethod* m = c->GetVirtualMethod(i);
-    mh.ChangeMethod(m);
+    mh.ChangeMethod(c->GetVirtualMethod(i));
     ResolveExceptionsForMethod(&mh, *exceptions_to_resolve);
   }
   for (size_t i = 0; i < c->NumDirectMethods(); ++i) {
-    mirror::ArtMethod* m = c->GetDirectMethod(i);
-    mh.ChangeMethod(m);
+    mh.ChangeMethod(c->GetDirectMethod(i));
     ResolveExceptionsForMethod(&mh, *exceptions_to_resolve);
   }
   return true;
@@ -1117,8 +1115,7 @@
     *stats_flags |= kFlagDirectCallToBoot | kFlagDirectMethodToBoot;
   }
   if (!use_dex_cache && compiling_boot) {
-    MethodHelper mh(method);
-    if (!IsImageClass(mh.GetDeclaringClassDescriptor())) {
+    if (!IsImageClass(method->GetDeclaringClassDescriptor())) {
       // We can only branch directly to Methods that are resolved in the DexCache.
       // Otherwise we won't invoke the resolution trampoline.
       use_dex_cache = true;
@@ -1131,8 +1128,10 @@
     target_method->dex_method_index = method->GetDexMethodIndex();
   } else {
     if (no_guarantee_of_dex_cache_entry) {
+      StackHandleScope<1> hs(Thread::Current());
+      MethodHelper mh(hs.NewHandle(method));
       // See if the method is also declared in this dex cache.
-      uint32_t dex_method_idx = MethodHelper(method).FindDexMethodIndexInOtherDexFile(
+      uint32_t dex_method_idx = mh.FindDexMethodIndexInOtherDexFile(
           *target_method->dex_file, target_method->dex_method_index);
       if (dex_method_idx != DexFile::kDexNoIndex) {
         target_method->dex_method_index = dex_method_idx;
@@ -1919,7 +1918,7 @@
     }
   }
   uint64_t duration_ns = NanoTime() - start_ns;
-  if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning())) {
+  if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning()) && !kIsDebugBuild) {
     LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file)
                  << " took " << PrettyDuration(duration_ns);
   }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index fad6798..9903421 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "compiler.h"
 #include "dex_file.h"
+#include "driver/compiler_options.h"
 #include "instruction_set.h"
 #include "invoke_type.h"
 #include "method_reference.h"
@@ -105,8 +106,7 @@
                           InstructionSetFeatures instruction_set_features,
                           bool image, DescriptorSet* image_classes,
                           size_t thread_count, bool dump_stats, bool dump_passes,
-                          CumulativeLogger* timer,
-                          std::string profile_file = "");
+                          CumulativeLogger* timer, std::string profile_file = "");
 
   ~CompilerDriver();
 
@@ -394,6 +394,10 @@
     return dump_passes_;
   }
 
+  bool DidIncludeDebugSymbols() const {
+    return compiler_options_->GetIncludeDebugSymbols();
+  }
+
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 05a9ac7..5d1c5da 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -42,6 +42,7 @@
   static const size_t kDefaultTinyMethodThreshold = 20;
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
+  static const bool kDefaultIncludeDebugSymbols = kIsDebugBuild;
 
   CompilerOptions() :
     compiler_filter_(kDefaultCompilerFilter),
@@ -51,7 +52,8 @@
     tiny_method_threshold_(kDefaultTinyMethodThreshold),
     num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
     generate_gdb_information_(false),
-    top_k_profile_threshold_(kDefaultTopKProfileThreshold)
+    top_k_profile_threshold_(kDefaultTopKProfileThreshold),
+    include_debug_symbols_(kDefaultIncludeDebugSymbols)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(false)
 #endif
@@ -64,7 +66,8 @@
                   size_t tiny_method_threshold,
                   size_t num_dex_methods_threshold,
                   bool generate_gdb_information,
-                  double top_k_profile_threshold
+                  double top_k_profile_threshold,
+                  bool include_debug_symbols
 #ifdef ART_SEA_IR_MODE
                   , bool sea_ir_mode
 #endif
@@ -76,7 +79,8 @@
     tiny_method_threshold_(tiny_method_threshold),
     num_dex_methods_threshold_(num_dex_methods_threshold),
     generate_gdb_information_(generate_gdb_information),
-    top_k_profile_threshold_(top_k_profile_threshold)
+    top_k_profile_threshold_(top_k_profile_threshold),
+    include_debug_symbols_(include_debug_symbols)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(sea_ir_mode)
 #endif
@@ -139,6 +143,10 @@
     return top_k_profile_threshold_;
   }
 
+  bool GetIncludeDebugSymbols() const {
+    return include_debug_symbols_;
+  }
+
 #ifdef ART_SEA_IR_MODE
   bool GetSeaIrMode();
 #endif
@@ -157,6 +165,7 @@
   bool generate_gdb_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
+  bool include_debug_symbols_;
 #ifdef ART_SEA_IR_MODE
   bool sea_ir_mode_;
 #endif
diff --git a/compiler/elf_stripper.cc b/compiler/elf_stripper.cc
index 8c06c9f..0b86ad0 100644
--- a/compiler/elf_stripper.cc
+++ b/compiler/elf_stripper.cc
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "base/logging.h"
+#include "base/stringprintf.h"
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "utils.h"
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 09f2eae..78757ec 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -29,21 +29,16 @@
 
 namespace art {
 
-bool ElfWriterQuick::Create(File* elf_file,
-                            OatWriter* oat_writer,
-                            const std::vector<const DexFile*>& dex_files,
-                            const std::string& android_root,
-                            bool is_host,
-                            const CompilerDriver& driver) {
-  ElfWriterQuick elf_writer(driver, elf_file);
-  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
+static constexpr Elf32_Word NextOffset(const Elf32_Shdr& cur, const Elf32_Shdr& prev) {
+  return RoundUp(prev.sh_size + prev.sh_offset, cur.sh_addralign);
 }
 
-bool ElfWriterQuick::Write(OatWriter* oat_writer,
-                           const std::vector<const DexFile*>& dex_files_unused,
-                           const std::string& android_root_unused,
-                           bool is_host_unused) {
-  const bool debug = false;
+static uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
+  return ((binding) << 4) + ((type) & 0xf);
+}
+
+bool ElfWriterQuick::ElfBuilder::Write() {
+  // The basic layout of the elf file. Order may be different in final output.
   // +-------------------------+
   // | Elf32_Ehdr              |
   // +-------------------------+
@@ -67,12 +62,14 @@
   // | boot.oat\0              |
   // +-------------------------+
   // | .hash                   |
-  // | Elf32_Word nbucket = 1  |
-  // | Elf32_Word nchain  = 3  |
-  // | Elf32_Word bucket[0] = 0|
-  // | Elf32_Word chain[0]  = 1|
-  // | Elf32_Word chain[1]  = 2|
-  // | Elf32_Word chain[2]  = 3|
+  // | Elf32_Word nbucket = b  |
+  // | Elf32_Word nchain  = c  |
+  // | Elf32_Word bucket[0]    |
+  // |         ...             |
+  // | Elf32_Word bucket[b - 1]|
+  // | Elf32_Word chain[0]     |
+  // |         ...             |
+  // | Elf32_Word chain[c - 1] |
   // +-------------------------+
   // | .rodata                 |
   // | oatdata..oatexec-4      |
@@ -88,6 +85,12 @@
   // | Elf32_Dyn DT_STRTAB     |
   // | Elf32_Dyn DT_STRSZ      |
   // | Elf32_Dyn DT_NULL       |
+  // +-------------------------+  (Optional)
+  // | .strtab                 |  (Optional)
+  // | program symbol names    |  (Optional)
+  // +-------------------------+  (Optional)
+  // | .symtab                 |  (Optional)
+  // | program symbols         |  (Optional)
   // +-------------------------+
   // | .shstrtab               |
   // | \0                      |
@@ -98,7 +101,20 @@
   // | .rodata\0               |
   // | .text\0                 |
   // | .shstrtab\0             |
-  // | .debug_frame\0          |
+  // | .symtab\0               |  (Optional)
+  // | .strtab\0               |  (Optional)
+  // | .debug_str\0            |  (Optional)
+  // | .debug_info\0           |  (Optional)
+  // | .debug_frame\0          |  (Optional)
+  // | .debug_abbrev\0         |  (Optional)
+  // +-------------------------+  (Optional)
+  // | .debug_str              |  (Optional)
+  // +-------------------------+  (Optional)
+  // | .debug_info             |  (Optional)
+  // +-------------------------+  (Optional)
+  // | .debug_frame            |  (Optional)
+  // +-------------------------+  (Optional)
+  // | .debug_abbrev           |  (Optional)
   // +-------------------------+
   // | Elf32_Shdr NULL         |
   // | Elf32_Shdr .dynsym      |
@@ -108,20 +124,20 @@
   // | Elf32_Shdr .rodata      |
   // | Elf32_Shdr .dynamic     |
   // | Elf32_Shdr .shstrtab    |
+  // | Elf32_Shdr .debug_str   |  (Optional)
   // | Elf32_Shdr .debug_info  |  (Optional)
-  // | Elf32_Shdr .debug_abbrev|  (Optional)
   // | Elf32_Shdr .debug_frame |  (Optional)
+  // | Elf32_Shdr .debug_abbrev|  (Optional)
   // +-------------------------+
 
-  // phase 1: computing offsets
-  uint32_t expected_offset = 0;
 
-  // Elf32_Ehdr
-  expected_offset += sizeof(Elf32_Ehdr);
+  if (fatal_error_) {
+    return false;
+  }
+  // Step 1. Figure out all the offsets.
 
-  // PHDR
-  uint32_t phdr_alignment = sizeof(Elf32_Word);
-  uint32_t phdr_offset = expected_offset;
+  // What phdr is.
+  uint32_t phdr_offset = sizeof(Elf32_Ehdr);
   const uint8_t PH_PHDR     = 0;
   const uint8_t PH_LOAD_R__ = 1;
   const uint8_t PH_LOAD_R_X = 2;
@@ -129,41 +145,40 @@
   const uint8_t PH_DYNAMIC  = 4;
   const uint8_t PH_NUM      = 5;
   uint32_t phdr_size = sizeof(Elf32_Phdr) * PH_NUM;
-  expected_offset += phdr_size;
-  if (debug) {
+  if (debug_logging_) {
     LOG(INFO) << "phdr_offset=" << phdr_offset << std::hex << " " << phdr_offset;
     LOG(INFO) << "phdr_size=" << phdr_size << std::hex << " " << phdr_size;
   }
+  Elf32_Phdr program_headers[PH_NUM];
+  memset(&program_headers, 0, sizeof(program_headers));
+  program_headers[PH_PHDR].p_type    = PT_PHDR;
+  program_headers[PH_PHDR].p_offset  = phdr_offset;
+  program_headers[PH_PHDR].p_vaddr   = phdr_offset;
+  program_headers[PH_PHDR].p_paddr   = phdr_offset;
+  program_headers[PH_PHDR].p_filesz  = sizeof(program_headers);
+  program_headers[PH_PHDR].p_memsz   = sizeof(program_headers);
+  program_headers[PH_PHDR].p_flags   = PF_R;
+  program_headers[PH_PHDR].p_align   = sizeof(Elf32_Word);
 
-  // .dynsym
-  uint32_t dynsym_alignment = sizeof(Elf32_Word);
-  uint32_t dynsym_offset = expected_offset = RoundUp(expected_offset, dynsym_alignment);
-  const uint8_t SYM_UNDEF       = 0;  // aka STN_UNDEF
-  const uint8_t SYM_OATDATA     = 1;
-  const uint8_t SYM_OATEXEC     = 2;
-  const uint8_t SYM_OATLASTWORD = 3;
-  const uint8_t SYM_NUM         = 4;
-  uint32_t dynsym_size = sizeof(Elf32_Sym) * SYM_NUM;
-  expected_offset += dynsym_size;
-  if (debug) {
-    LOG(INFO) << "dynsym_offset=" << dynsym_offset << std::hex << " " << dynsym_offset;
-    LOG(INFO) << "dynsym_size=" << dynsym_size << std::hex << " " << dynsym_size;
-  }
+  program_headers[PH_LOAD_R__].p_type    = PT_LOAD;
+  program_headers[PH_LOAD_R__].p_offset  = 0;
+  program_headers[PH_LOAD_R__].p_vaddr   = 0;
+  program_headers[PH_LOAD_R__].p_paddr   = 0;
+  program_headers[PH_LOAD_R__].p_flags   = PF_R;
 
-  // .dynstr
-  uint32_t dynstr_alignment = 1;
-  uint32_t dynstr_offset = expected_offset = RoundUp(expected_offset, dynstr_alignment);
-  std::string dynstr;
-  dynstr += '\0';
-  uint32_t dynstr_oatdata_offset = dynstr.size();
-  dynstr += "oatdata";
-  dynstr += '\0';
-  uint32_t dynstr_oatexec_offset = dynstr.size();
-  dynstr += "oatexec";
-  dynstr += '\0';
-  uint32_t dynstr_oatlastword_offset = dynstr.size();
-  dynstr += "oatlastword";
-  dynstr += '\0';
+  program_headers[PH_LOAD_R_X].p_type    = PT_LOAD;
+  program_headers[PH_LOAD_R_X].p_flags   = PF_R | PF_X;
+
+  program_headers[PH_LOAD_RW_].p_type    = PT_LOAD;
+  program_headers[PH_LOAD_RW_].p_flags   = PF_R | PF_W;
+
+  program_headers[PH_DYNAMIC].p_type    = PT_DYNAMIC;
+  program_headers[PH_DYNAMIC].p_flags   = PF_R | PF_W;
+
+  // Get the dynstr string.
+  std::string dynstr(dynsym_builder_.GenerateStrtab());
+
+  // Add the SONAME to the dynstr.
   uint32_t dynstr_soname_offset = dynstr.size();
   std::string file_name(elf_file_->GetPath());
   size_t directory_separator_pos = file_name.rfind('/');
@@ -172,672 +187,665 @@
   }
   dynstr += file_name;
   dynstr += '\0';
-  uint32_t dynstr_size = dynstr.size();
-  expected_offset += dynstr_size;
-  if (debug) {
-    LOG(INFO) << "dynstr_offset=" << dynstr_offset << std::hex << " " << dynstr_offset;
-    LOG(INFO) << "dynstr_size=" << dynstr_size << std::hex << " " << dynstr_size;
+  if (debug_logging_) {
+    LOG(INFO) << "dynstr size (bytes)   =" << dynstr.size()
+              << std::hex << " " << dynstr.size();
+    LOG(INFO) << "dynsym size (elements)=" << dynsym_builder_.GetSize()
+              << std::hex << " " << dynsym_builder_.GetSize();
   }
 
-  // .hash
-  uint32_t hash_alignment = sizeof(Elf32_Word);  // Even for 64-bit
-  uint32_t hash_offset = expected_offset = RoundUp(expected_offset, hash_alignment);
-  const uint8_t HASH_NBUCKET = 0;
-  const uint8_t HASH_NCHAIN  = 1;
-  const uint8_t HASH_BUCKET0 = 2;
-  const uint8_t HASH_NUM     = HASH_BUCKET0 + 1 + SYM_NUM;
-  uint32_t hash_size = sizeof(Elf32_Word) * HASH_NUM;
-  expected_offset += hash_size;
-  if (debug) {
-    LOG(INFO) << "hash_offset=" << hash_offset << std::hex << " " << hash_offset;
-    LOG(INFO) << "hash_size=" << hash_size << std::hex << " " << hash_size;
+  // get the strtab
+  std::string strtab;
+  if (IncludingDebugSymbols()) {
+    strtab = symtab_builder_.GenerateStrtab();
+    if (debug_logging_) {
+      LOG(INFO) << "strtab size (bytes)    =" << strtab.size()
+                << std::hex << " " << strtab.size();
+      LOG(INFO) << "symtab size (elements) =" << symtab_builder_.GetSize()
+                << std::hex << " " << symtab_builder_.GetSize();
+    }
   }
 
-  // .rodata
-  uint32_t oat_data_alignment = kPageSize;
-  uint32_t oat_data_offset = expected_offset = RoundUp(expected_offset, oat_data_alignment);
-  const OatHeader& oat_header = oat_writer->GetOatHeader();
-  CHECK(oat_header.IsValid());
-  uint32_t oat_data_size = oat_header.GetExecutableOffset();
-  expected_offset += oat_data_size;
-  if (debug) {
-    LOG(INFO) << "oat_data_offset=" << oat_data_offset << std::hex << " " << oat_data_offset;
-    LOG(INFO) << "oat_data_size=" << oat_data_size << std::hex << " " << oat_data_size;
-  }
-
-  // .text
-  uint32_t oat_exec_alignment = kPageSize;
-  CHECK_ALIGNED(expected_offset, kPageSize);
-  uint32_t oat_exec_offset = expected_offset = RoundUp(expected_offset, oat_exec_alignment);
-  uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
-  expected_offset += oat_exec_size;
-  CHECK_EQ(oat_data_offset + oat_writer->GetSize(), expected_offset);
-  if (debug) {
-    LOG(INFO) << "oat_exec_offset=" << oat_exec_offset << std::hex << " " << oat_exec_offset;
-    LOG(INFO) << "oat_exec_size=" << oat_exec_size << std::hex << " " << oat_exec_size;
-  }
-
-  // .dynamic
-  // alignment would naturally be sizeof(Elf32_Word), but we want this in a new segment
-  uint32_t dynamic_alignment = kPageSize;
-  uint32_t dynamic_offset = expected_offset = RoundUp(expected_offset, dynamic_alignment);
-  const uint8_t DH_SONAME = 0;
-  const uint8_t DH_HASH   = 1;
-  const uint8_t DH_SYMTAB = 2;
-  const uint8_t DH_SYMENT = 3;
-  const uint8_t DH_STRTAB = 4;
-  const uint8_t DH_STRSZ  = 5;
-  const uint8_t DH_NULL   = 6;
-  const uint8_t DH_NUM    = 7;
-  uint32_t dynamic_size = sizeof(Elf32_Dyn) * DH_NUM;
-  expected_offset += dynamic_size;
-  if (debug) {
-    LOG(INFO) << "dynamic_offset=" << dynamic_offset << std::hex << " " << dynamic_offset;
-    LOG(INFO) << "dynamic_size=" << dynamic_size << std::hex << " " << dynamic_size;
-  }
-
-  // .shstrtab
-  uint32_t shstrtab_alignment = 1;
-  uint32_t shstrtab_offset = expected_offset = RoundUp(expected_offset, shstrtab_alignment);
+  // Get the section header string table.
+  std::vector<Elf32_Shdr*> section_ptrs;
   std::string shstrtab;
   shstrtab += '\0';
-  uint32_t shstrtab_dynamic_offset = shstrtab.size();
-  CHECK_EQ(1U, shstrtab_dynamic_offset);
-  shstrtab += ".dynamic";
-  shstrtab += '\0';
-  uint32_t shstrtab_dynsym_offset = shstrtab.size();
-  shstrtab += ".dynsym";
-  shstrtab += '\0';
-  uint32_t shstrtab_dynstr_offset = shstrtab.size();
-  shstrtab += ".dynstr";
-  shstrtab += '\0';
-  uint32_t shstrtab_hash_offset = shstrtab.size();
-  shstrtab += ".hash";
-  shstrtab += '\0';
-  uint32_t shstrtab_rodata_offset = shstrtab.size();
-  shstrtab += ".rodata";
-  shstrtab += '\0';
-  uint32_t shstrtab_text_offset = shstrtab.size();
-  shstrtab += ".text";
-  shstrtab += '\0';
-  uint32_t shstrtab_shstrtab_offset = shstrtab.size();
-  shstrtab += ".shstrtab";
-  shstrtab += '\0';
-  uint32_t shstrtab_debug_info_offset = shstrtab.size();
-  shstrtab += ".debug_info";
-  shstrtab += '\0';
-  uint32_t shstrtab_debug_abbrev_offset = shstrtab.size();
-  shstrtab += ".debug_abbrev";
-  shstrtab += '\0';
-  uint32_t shstrtab_debug_str_offset = shstrtab.size();
-  shstrtab += ".debug_str";
-  shstrtab += '\0';
-  uint32_t shstrtab_debug_frame_offset = shstrtab.size();
-  shstrtab += ".debug_frame";
-  shstrtab += '\0';
-  uint32_t shstrtab_size = shstrtab.size();
-  expected_offset += shstrtab_size;
-  if (debug) {
-    LOG(INFO) << "shstrtab_offset=" << shstrtab_offset << std::hex << " " << shstrtab_offset;
-    LOG(INFO) << "shstrtab_size=" << shstrtab_size << std::hex << " " << shstrtab_size;
+
+  // Setup sym_undef
+  Elf32_Shdr null_hdr;
+  memset(&null_hdr, 0, sizeof(null_hdr));
+  null_hdr.sh_type = SHT_NULL;
+  null_hdr.sh_link = SHN_UNDEF;
+  section_ptrs.push_back(&null_hdr);
+
+  uint32_t section_index = 1;
+
+  // setup .dynsym
+  section_ptrs.push_back(&dynsym_builder_.section_);
+  AssignSectionStr(&dynsym_builder_, &shstrtab);
+  dynsym_builder_.section_index_ = section_index++;
+
+  // Setup .dynstr
+  section_ptrs.push_back(&dynsym_builder_.strtab_.section_);
+  AssignSectionStr(&dynsym_builder_.strtab_, &shstrtab);
+  dynsym_builder_.strtab_.section_index_ = section_index++;
+
+  // Setup .hash
+  section_ptrs.push_back(&hash_builder_.section_);
+  AssignSectionStr(&hash_builder_, &shstrtab);
+  hash_builder_.section_index_ = section_index++;
+
+  // Setup .rodata
+  section_ptrs.push_back(&rodata_builder_.section_);
+  AssignSectionStr(&rodata_builder_, &shstrtab);
+  rodata_builder_.section_index_ = section_index++;
+
+  // Setup .text
+  section_ptrs.push_back(&text_builder_.section_);
+  AssignSectionStr(&text_builder_, &shstrtab);
+  text_builder_.section_index_ = section_index++;
+
+  // Setup .dynamic
+  section_ptrs.push_back(&dynamic_builder_.section_);
+  AssignSectionStr(&dynamic_builder_, &shstrtab);
+  dynamic_builder_.section_index_ = section_index++;
+
+  if (IncludingDebugSymbols()) {
+    // Setup .symtab
+    section_ptrs.push_back(&symtab_builder_.section_);
+    AssignSectionStr(&symtab_builder_, &shstrtab);
+    symtab_builder_.section_index_ = section_index++;
+
+    // Setup .strtab
+    section_ptrs.push_back(&symtab_builder_.strtab_.section_);
+    AssignSectionStr(&symtab_builder_.strtab_, &shstrtab);
+    symtab_builder_.strtab_.section_index_ = section_index++;
+  }
+  ElfRawSectionBuilder* it = other_builders_.data();
+  for (uint32_t cnt = 0; cnt < other_builders_.size(); ++it, ++cnt) {
+    // Setup all the other sections.
+    section_ptrs.push_back(&it->section_);
+    AssignSectionStr(it, &shstrtab);
+    it->section_index_ = section_index++;
   }
 
-  // Create debug informatin, if we have it.
-  bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr;
-  std::vector<uint8_t> dbg_info;
-  std::vector<uint8_t> dbg_abbrev;
-  std::vector<uint8_t> dbg_str;
-  if (generateDebugInformation) {
-    FillInCFIInformation(oat_writer, &dbg_info, &dbg_abbrev, &dbg_str);
+  // Setup shstrtab
+  section_ptrs.push_back(&shstrtab_builder_.section_);
+  AssignSectionStr(&shstrtab_builder_, &shstrtab);
+  shstrtab_builder_.section_index_ = section_index++;
+
+  if (debug_logging_) {
+    LOG(INFO) << ".shstrtab size    (bytes)   =" << shstrtab.size()
+              << std::hex << " " << shstrtab.size();
+    LOG(INFO) << "section list size (elements)=" << section_ptrs.size()
+              << std::hex << " " << section_ptrs.size();
   }
 
-  uint32_t shdbg_info_alignment = 1;
-  uint32_t shdbg_info_offset = expected_offset;
-  uint32_t shdbg_info_size = dbg_info.size();
-  expected_offset += shdbg_info_size;
-  if (debug) {
-    LOG(INFO) << "shdbg_info_offset=" << shdbg_info_offset << std::hex << " " << shdbg_info_offset;
-    LOG(INFO) << "shdbg_info_size=" << shdbg_info_size << std::hex << " " << shdbg_info_size;
+  // Fill in the hash section.
+  std::vector<Elf32_Word> hash = dynsym_builder_.GenerateHashContents();
+
+  if (debug_logging_) {
+    LOG(INFO) << ".hash size (bytes)=" << hash.size() * sizeof(Elf32_Word)
+              << std::hex << " " << hash.size() * sizeof(Elf32_Word);
   }
 
-  uint32_t shdbg_abbrev_alignment = 1;
-  uint32_t shdbg_abbrev_offset = expected_offset;
-  uint32_t shdbg_abbrev_size = dbg_abbrev.size();
-  expected_offset += shdbg_abbrev_size;
-  if (debug) {
-    LOG(INFO) << "shdbg_abbrev_offset=" << shdbg_abbrev_offset << std::hex << " " << shdbg_abbrev_offset;
-    LOG(INFO) << "shdbg_abbrev_size=" << shdbg_abbrev_size << std::hex << " " << shdbg_abbrev_size;
+  Elf32_Word base_offset = sizeof(Elf32_Ehdr) + sizeof(program_headers);
+  std::vector<ElfFilePiece> pieces;
+
+  // Get the layout in the sections.
+  //
+  // Get the layout of the dynsym section.
+  dynsym_builder_.section_.sh_offset = RoundUp(base_offset, dynsym_builder_.section_.sh_addralign);
+  dynsym_builder_.section_.sh_addr = dynsym_builder_.section_.sh_offset;
+  dynsym_builder_.section_.sh_size = dynsym_builder_.GetSize() * sizeof(Elf32_Sym);
+  dynsym_builder_.section_.sh_link = dynsym_builder_.GetLink();
+
+  // Get the layout of the dynstr section.
+  dynsym_builder_.strtab_.section_.sh_offset = NextOffset(dynsym_builder_.strtab_.section_,
+                                                          dynsym_builder_.section_);
+  dynsym_builder_.strtab_.section_.sh_addr = dynsym_builder_.strtab_.section_.sh_offset;
+  dynsym_builder_.strtab_.section_.sh_size = dynstr.size();
+  dynsym_builder_.strtab_.section_.sh_link = dynsym_builder_.strtab_.GetLink();
+
+  // Get the layout of the hash section
+  hash_builder_.section_.sh_offset = NextOffset(hash_builder_.section_,
+                                                dynsym_builder_.strtab_.section_);
+  hash_builder_.section_.sh_addr = hash_builder_.section_.sh_offset;
+  hash_builder_.section_.sh_size = hash.size() * sizeof(Elf32_Word);
+  hash_builder_.section_.sh_link = hash_builder_.GetLink();
+
+  // Get the layout of the rodata section.
+  rodata_builder_.section_.sh_offset = NextOffset(rodata_builder_.section_,
+                                                  hash_builder_.section_);
+  rodata_builder_.section_.sh_addr = rodata_builder_.section_.sh_offset;
+  rodata_builder_.section_.sh_size = rodata_builder_.size_;
+  rodata_builder_.section_.sh_link = rodata_builder_.GetLink();
+
+  // Get the layout of the text section.
+  text_builder_.section_.sh_offset = NextOffset(text_builder_.section_, rodata_builder_.section_);
+  text_builder_.section_.sh_addr = text_builder_.section_.sh_offset;
+  text_builder_.section_.sh_size = text_builder_.size_;
+  text_builder_.section_.sh_link = text_builder_.GetLink();
+  CHECK_ALIGNED(rodata_builder_.section_.sh_offset + rodata_builder_.section_.sh_size, kPageSize);
+
+  // Get the layout of the dynamic section.
+  dynamic_builder_.section_.sh_offset = NextOffset(dynamic_builder_.section_,
+                                                   text_builder_.section_);
+  dynamic_builder_.section_.sh_addr = dynamic_builder_.section_.sh_offset;
+  dynamic_builder_.section_.sh_size = dynamic_builder_.GetSize() * sizeof(Elf32_Dyn);
+  dynamic_builder_.section_.sh_link = dynamic_builder_.GetLink();
+
+  Elf32_Shdr prev = dynamic_builder_.section_;
+  if (IncludingDebugSymbols()) {
+    // Get the layout of the symtab section.
+    symtab_builder_.section_.sh_offset = NextOffset(symtab_builder_.section_,
+                                                    dynamic_builder_.section_);
+    symtab_builder_.section_.sh_addr = 0;
+    // Add to leave space for the null symbol.
+    symtab_builder_.section_.sh_size = symtab_builder_.GetSize() * sizeof(Elf32_Sym);
+    symtab_builder_.section_.sh_link = symtab_builder_.GetLink();
+
+    // Get the layout of the dynstr section.
+    symtab_builder_.strtab_.section_.sh_offset = NextOffset(symtab_builder_.strtab_.section_,
+                                                            symtab_builder_.section_);
+    symtab_builder_.strtab_.section_.sh_addr = 0;
+    symtab_builder_.strtab_.section_.sh_size = strtab.size();
+    symtab_builder_.strtab_.section_.sh_link = symtab_builder_.strtab_.GetLink();
+
+    prev = symtab_builder_.strtab_.section_;
   }
-
-  uint32_t shdbg_frm_alignment = 4;
-  uint32_t shdbg_frm_offset = expected_offset = RoundUp(expected_offset, shdbg_frm_alignment);
-  uint32_t shdbg_frm_size =
-    generateDebugInformation ? compiler_driver_->GetCallFrameInformation()->size() : 0;
-  expected_offset += shdbg_frm_size;
-  if (debug) {
-    LOG(INFO) << "shdbg_frm_offset=" << shdbg_frm_offset << std::hex << " " << shdbg_frm_offset;
-    LOG(INFO) << "shdbg_frm_size=" << shdbg_frm_size << std::hex << " " << shdbg_frm_size;
-  }
-
-  uint32_t shdbg_str_alignment = 1;
-  uint32_t shdbg_str_offset = expected_offset;
-  uint32_t shdbg_str_size = dbg_str.size();
-  expected_offset += shdbg_str_size;
-  if (debug) {
-    LOG(INFO) << "shdbg_str_offset=" << shdbg_str_offset << std::hex << " " << shdbg_str_offset;
-    LOG(INFO) << "shdbg_str_size=" << shdbg_str_size << std::hex << " " << shdbg_str_size;
-  }
-
-  // section headers (after all sections)
-  uint32_t shdr_alignment = sizeof(Elf32_Word);
-  uint32_t shdr_offset = expected_offset = RoundUp(expected_offset, shdr_alignment);
-  const uint8_t SH_NULL     = 0;
-  const uint8_t SH_DYNSYM   = 1;
-  const uint8_t SH_DYNSTR   = 2;
-  const uint8_t SH_HASH     = 3;
-  const uint8_t SH_RODATA   = 4;
-  const uint8_t SH_TEXT     = 5;
-  const uint8_t SH_DYNAMIC  = 6;
-  const uint8_t SH_SHSTRTAB = 7;
-  const uint8_t SH_DBG_INFO = 8;
-  const uint8_t SH_DBG_ABRV = 9;
-  const uint8_t SH_DBG_FRM  = 10;
-  const uint8_t SH_DBG_STR  = 11;
-  const uint8_t SH_NUM      = generateDebugInformation ? 12 : 8;
-  uint32_t shdr_size = sizeof(Elf32_Shdr) * SH_NUM;
-  expected_offset += shdr_size;
-  if (debug) {
-    LOG(INFO) << "shdr_offset=" << shdr_offset << std::hex << " " << shdr_offset;
-    LOG(INFO) << "shdr_size=" << shdr_size << std::hex << " " << shdr_size;
-  }
-
-  // phase 2: initializing data
-
-  // Elf32_Ehdr
-  Elf32_Ehdr elf_header;
-  memset(&elf_header, 0, sizeof(elf_header));
-  elf_header.e_ident[EI_MAG0]       = ELFMAG0;
-  elf_header.e_ident[EI_MAG1]       = ELFMAG1;
-  elf_header.e_ident[EI_MAG2]       = ELFMAG2;
-  elf_header.e_ident[EI_MAG3]       = ELFMAG3;
-  elf_header.e_ident[EI_CLASS]      = ELFCLASS32;
-  elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
-  elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
-  elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
-  elf_header.e_ident[EI_ABIVERSION] = 0;
-  elf_header.e_type = ET_DYN;
-  switch (compiler_driver_->GetInstructionSet()) {
-    case kArm:
-      // Fall through.
-    case kThumb2: {
-      elf_header.e_machine = EM_ARM;
-      elf_header.e_flags = EF_ARM_EABI_VER5;
-      break;
-    }
-    case kArm64: {
-      elf_header.e_machine = EM_AARCH64;
-      elf_header.e_flags = 0;
-      break;
-    }
-    case kX86: {
-      elf_header.e_machine = EM_386;
-      elf_header.e_flags = 0;
-      break;
-    }
-    case kX86_64: {
-      elf_header.e_machine = EM_X86_64;
-      elf_header.e_flags = 0;
-      break;
-    }
-    case kMips: {
-      elf_header.e_machine = EM_MIPS;
-      elf_header.e_flags = (EF_MIPS_NOREORDER |
-                            EF_MIPS_PIC       |
-                            EF_MIPS_CPIC      |
-                            EF_MIPS_ABI_O32   |
-                            EF_MIPS_ARCH_32R2);
-      break;
-    }
-    default: {
-      LOG(FATAL) << "Unknown instruction set: " << compiler_driver_->GetInstructionSet();
-      break;
+  if (debug_logging_) {
+    LOG(INFO) << "dynsym off=" << dynsym_builder_.section_.sh_offset
+              << " dynsym size=" << dynsym_builder_.section_.sh_size;
+    LOG(INFO) << "dynstr off=" << dynsym_builder_.strtab_.section_.sh_offset
+              << " dynstr size=" << dynsym_builder_.strtab_.section_.sh_size;
+    LOG(INFO) << "hash off=" << hash_builder_.section_.sh_offset
+              << " hash size=" << hash_builder_.section_.sh_size;
+    LOG(INFO) << "rodata off=" << rodata_builder_.section_.sh_offset
+              << " rodata size=" << rodata_builder_.section_.sh_size;
+    LOG(INFO) << "text off=" << text_builder_.section_.sh_offset
+              << " text size=" << text_builder_.section_.sh_size;
+    LOG(INFO) << "dynamic off=" << dynamic_builder_.section_.sh_offset
+              << " dynamic size=" << dynamic_builder_.section_.sh_size;
+    if (IncludingDebugSymbols()) {
+      LOG(INFO) << "symtab off=" << symtab_builder_.section_.sh_offset
+                << " symtab size=" << symtab_builder_.section_.sh_size;
+      LOG(INFO) << "strtab off=" << symtab_builder_.strtab_.section_.sh_offset
+                << " strtab size=" << symtab_builder_.strtab_.section_.sh_size;
     }
   }
-  elf_header.e_version = 1;
-  elf_header.e_entry = 0;
-  elf_header.e_phoff = phdr_offset;
-  elf_header.e_shoff = shdr_offset;
-  elf_header.e_ehsize = sizeof(Elf32_Ehdr);
-  elf_header.e_phentsize = sizeof(Elf32_Phdr);
-  elf_header.e_phnum = PH_NUM;
-  elf_header.e_shentsize = sizeof(Elf32_Shdr);
-  elf_header.e_shnum = SH_NUM;
-  elf_header.e_shstrndx = SH_SHSTRTAB;
-
-  // PHDR
-  Elf32_Phdr program_headers[PH_NUM];
-  memset(&program_headers, 0, sizeof(program_headers));
-
-  program_headers[PH_PHDR].p_type    = PT_PHDR;
-  program_headers[PH_PHDR].p_offset  = phdr_offset;
-  program_headers[PH_PHDR].p_vaddr   = phdr_offset;
-  program_headers[PH_PHDR].p_paddr   = phdr_offset;
-  program_headers[PH_PHDR].p_filesz  = sizeof(program_headers);
-  program_headers[PH_PHDR].p_memsz   = sizeof(program_headers);
-  program_headers[PH_PHDR].p_flags   = PF_R;
-  program_headers[PH_PHDR].p_align   = phdr_alignment;
-
-  program_headers[PH_LOAD_R__].p_type    = PT_LOAD;
-  program_headers[PH_LOAD_R__].p_offset  = 0;
-  program_headers[PH_LOAD_R__].p_vaddr   = 0;
-  program_headers[PH_LOAD_R__].p_paddr   = 0;
-  program_headers[PH_LOAD_R__].p_filesz  = oat_data_offset + oat_data_size;
-  program_headers[PH_LOAD_R__].p_memsz   = oat_data_offset + oat_data_size;
-  program_headers[PH_LOAD_R__].p_flags   = PF_R;
-  program_headers[PH_LOAD_R__].p_align   = oat_data_alignment;
-
-  program_headers[PH_LOAD_R_X].p_type    = PT_LOAD;
-  program_headers[PH_LOAD_R_X].p_offset  = oat_exec_offset;
-  program_headers[PH_LOAD_R_X].p_vaddr   = oat_exec_offset;
-  program_headers[PH_LOAD_R_X].p_paddr   = oat_exec_offset;
-  program_headers[PH_LOAD_R_X].p_filesz  = oat_exec_size;
-  program_headers[PH_LOAD_R_X].p_memsz   = oat_exec_size;
-  program_headers[PH_LOAD_R_X].p_flags   = PF_R | PF_X;
-  program_headers[PH_LOAD_R_X].p_align   = oat_exec_alignment;
-
-  // TODO: PF_W for DYNAMIC is considered processor specific, do we need it?
-  program_headers[PH_LOAD_RW_].p_type    = PT_LOAD;
-  program_headers[PH_LOAD_RW_].p_offset  = dynamic_offset;
-  program_headers[PH_LOAD_RW_].p_vaddr   = dynamic_offset;
-  program_headers[PH_LOAD_RW_].p_paddr   = dynamic_offset;
-  program_headers[PH_LOAD_RW_].p_filesz  = dynamic_size;
-  program_headers[PH_LOAD_RW_].p_memsz   = dynamic_size;
-  program_headers[PH_LOAD_RW_].p_flags   = PF_R | PF_W;
-  program_headers[PH_LOAD_RW_].p_align   = dynamic_alignment;
-
-  // TODO: PF_W for DYNAMIC is considered processor specific, do we need it?
-  program_headers[PH_DYNAMIC].p_type    = PT_DYNAMIC;
-  program_headers[PH_DYNAMIC].p_offset  = dynamic_offset;
-  program_headers[PH_DYNAMIC].p_vaddr   = dynamic_offset;
-  program_headers[PH_DYNAMIC].p_paddr   = dynamic_offset;
-  program_headers[PH_DYNAMIC].p_filesz  = dynamic_size;
-  program_headers[PH_DYNAMIC].p_memsz   = dynamic_size;
-  program_headers[PH_DYNAMIC].p_flags   = PF_R | PF_W;
-  program_headers[PH_DYNAMIC].p_align   = dynamic_alignment;
-
-  // .dynsym
-  Elf32_Sym dynsym[SYM_NUM];
-  memset(&dynsym, 0, sizeof(dynsym));
-
-  dynsym[SYM_UNDEF].st_name  = 0;
-  dynsym[SYM_UNDEF].st_value = 0;
-  dynsym[SYM_UNDEF].st_size  = 0;
-  dynsym[SYM_UNDEF].st_info  = 0;
-  dynsym[SYM_UNDEF].st_other = 0;
-  dynsym[SYM_UNDEF].st_shndx = 0;
-
-  dynsym[SYM_OATDATA].st_name  = dynstr_oatdata_offset;
-  dynsym[SYM_OATDATA].st_value = oat_data_offset;
-  dynsym[SYM_OATDATA].st_size  = oat_data_size;
-  SetBindingAndType(&dynsym[SYM_OATDATA], STB_GLOBAL, STT_OBJECT);
-  dynsym[SYM_OATDATA].st_other = STV_DEFAULT;
-  dynsym[SYM_OATDATA].st_shndx = SH_RODATA;
-
-  dynsym[SYM_OATEXEC].st_name  = dynstr_oatexec_offset;
-  dynsym[SYM_OATEXEC].st_value = oat_exec_offset;
-  dynsym[SYM_OATEXEC].st_size  = oat_exec_size;
-  SetBindingAndType(&dynsym[SYM_OATEXEC], STB_GLOBAL, STT_OBJECT);
-  dynsym[SYM_OATEXEC].st_other = STV_DEFAULT;
-  dynsym[SYM_OATEXEC].st_shndx = SH_TEXT;
-
-  dynsym[SYM_OATLASTWORD].st_name  = dynstr_oatlastword_offset;
-  dynsym[SYM_OATLASTWORD].st_value = oat_exec_offset + oat_exec_size - 4;
-  dynsym[SYM_OATLASTWORD].st_size  = 4;
-  SetBindingAndType(&dynsym[SYM_OATLASTWORD], STB_GLOBAL, STT_OBJECT);
-  dynsym[SYM_OATLASTWORD].st_other = STV_DEFAULT;
-  dynsym[SYM_OATLASTWORD].st_shndx = SH_TEXT;
-
-  // .dynstr initialized above as dynstr
-
-  // .hash
-  Elf32_Word hash[HASH_NUM];  // Note this is Elf32_Word even on 64-bit
-  hash[HASH_NBUCKET] = 1;
-  hash[HASH_NCHAIN]  = SYM_NUM;
-  hash[HASH_BUCKET0] = SYM_OATDATA;
-  hash[HASH_BUCKET0 + 1 + SYM_UNDEF]       = SYM_UNDEF;
-  hash[HASH_BUCKET0 + 1 + SYM_OATDATA]     = SYM_OATEXEC;
-  hash[HASH_BUCKET0 + 1 + SYM_OATEXEC]     = SYM_OATLASTWORD;
-  hash[HASH_BUCKET0 + 1 + SYM_OATLASTWORD] = SYM_UNDEF;
-
-  // .rodata and .text content come from oat_contents
-
-  // .dynamic
-  Elf32_Dyn dynamic_headers[DH_NUM];
-  memset(&dynamic_headers, 0, sizeof(dynamic_headers));
-
-  dynamic_headers[DH_SONAME].d_tag = DT_SONAME;
-  dynamic_headers[DH_SONAME].d_un.d_val = dynstr_soname_offset;
-
-  dynamic_headers[DH_HASH].d_tag = DT_HASH;
-  dynamic_headers[DH_HASH].d_un.d_ptr = hash_offset;
-
-  dynamic_headers[DH_SYMTAB].d_tag = DT_SYMTAB;
-  dynamic_headers[DH_SYMTAB].d_un.d_ptr = dynsym_offset;
-
-  dynamic_headers[DH_SYMENT].d_tag = DT_SYMENT;
-  dynamic_headers[DH_SYMENT].d_un.d_val = sizeof(Elf32_Sym);
-
-  dynamic_headers[DH_STRTAB].d_tag = DT_STRTAB;
-  dynamic_headers[DH_STRTAB].d_un.d_ptr = dynstr_offset;
-
-  dynamic_headers[DH_STRSZ].d_tag = DT_STRSZ;
-  dynamic_headers[DH_STRSZ].d_un.d_val = dynstr_size;
-
-  dynamic_headers[DH_NULL].d_tag = DT_NULL;
-  dynamic_headers[DH_NULL].d_un.d_val = 0;
-
-  // .shstrtab initialized above as shstrtab
-
-  // section headers (after all sections)
-  Elf32_Shdr section_headers[SH_NUM];
-  memset(&section_headers, 0, sizeof(section_headers));
-
-  section_headers[SH_NULL].sh_name      = 0;
-  section_headers[SH_NULL].sh_type      = SHT_NULL;
-  section_headers[SH_NULL].sh_flags     = 0;
-  section_headers[SH_NULL].sh_addr      = 0;
-  section_headers[SH_NULL].sh_offset    = 0;
-  section_headers[SH_NULL].sh_size      = 0;
-  section_headers[SH_NULL].sh_link      = 0;
-  section_headers[SH_NULL].sh_info      = 0;
-  section_headers[SH_NULL].sh_addralign = 0;
-  section_headers[SH_NULL].sh_entsize   = 0;
-
-  section_headers[SH_DYNSYM].sh_name      = shstrtab_dynsym_offset;
-  section_headers[SH_DYNSYM].sh_type      = SHT_DYNSYM;
-  section_headers[SH_DYNSYM].sh_flags     = SHF_ALLOC;
-  section_headers[SH_DYNSYM].sh_addr      = dynsym_offset;
-  section_headers[SH_DYNSYM].sh_offset    = dynsym_offset;
-  section_headers[SH_DYNSYM].sh_size      = dynsym_size;
-  section_headers[SH_DYNSYM].sh_link      = SH_DYNSTR;
-  section_headers[SH_DYNSYM].sh_info      = 1;  // 1 because we have not STB_LOCAL symbols
-  section_headers[SH_DYNSYM].sh_addralign = dynsym_alignment;
-  section_headers[SH_DYNSYM].sh_entsize   = sizeof(Elf32_Sym);
-
-  section_headers[SH_DYNSTR].sh_name      = shstrtab_dynstr_offset;
-  section_headers[SH_DYNSTR].sh_type      = SHT_STRTAB;
-  section_headers[SH_DYNSTR].sh_flags     = SHF_ALLOC;
-  section_headers[SH_DYNSTR].sh_addr      = dynstr_offset;
-  section_headers[SH_DYNSTR].sh_offset    = dynstr_offset;
-  section_headers[SH_DYNSTR].sh_size      = dynstr_size;
-  section_headers[SH_DYNSTR].sh_link      = 0;
-  section_headers[SH_DYNSTR].sh_info      = 0;
-  section_headers[SH_DYNSTR].sh_addralign = dynstr_alignment;
-  section_headers[SH_DYNSTR].sh_entsize   = 0;
-
-  section_headers[SH_HASH].sh_name      = shstrtab_hash_offset;
-  section_headers[SH_HASH].sh_type      = SHT_HASH;
-  section_headers[SH_HASH].sh_flags     = SHF_ALLOC;
-  section_headers[SH_HASH].sh_addr      = hash_offset;
-  section_headers[SH_HASH].sh_offset    = hash_offset;
-  section_headers[SH_HASH].sh_size      = hash_size;
-  section_headers[SH_HASH].sh_link      = SH_DYNSYM;
-  section_headers[SH_HASH].sh_info      = 0;
-  section_headers[SH_HASH].sh_addralign = hash_alignment;
-  section_headers[SH_HASH].sh_entsize   = sizeof(Elf32_Word);  // This is Elf32_Word even on 64-bit
-
-  section_headers[SH_RODATA].sh_name      = shstrtab_rodata_offset;
-  section_headers[SH_RODATA].sh_type      = SHT_PROGBITS;
-  section_headers[SH_RODATA].sh_flags     = SHF_ALLOC;
-  section_headers[SH_RODATA].sh_addr      = oat_data_offset;
-  section_headers[SH_RODATA].sh_offset    = oat_data_offset;
-  section_headers[SH_RODATA].sh_size      = oat_data_size;
-  section_headers[SH_RODATA].sh_link      = 0;
-  section_headers[SH_RODATA].sh_info      = 0;
-  section_headers[SH_RODATA].sh_addralign = oat_data_alignment;
-  section_headers[SH_RODATA].sh_entsize   = 0;
-
-  section_headers[SH_TEXT].sh_name      = shstrtab_text_offset;
-  section_headers[SH_TEXT].sh_type      = SHT_PROGBITS;
-  section_headers[SH_TEXT].sh_flags     = SHF_ALLOC | SHF_EXECINSTR;
-  section_headers[SH_TEXT].sh_addr      = oat_exec_offset;
-  section_headers[SH_TEXT].sh_offset    = oat_exec_offset;
-  section_headers[SH_TEXT].sh_size      = oat_exec_size;
-  section_headers[SH_TEXT].sh_link      = 0;
-  section_headers[SH_TEXT].sh_info      = 0;
-  section_headers[SH_TEXT].sh_addralign = oat_exec_alignment;
-  section_headers[SH_TEXT].sh_entsize   = 0;
-
-  // TODO: SHF_WRITE for .dynamic is considered processor specific, do we need it?
-  section_headers[SH_DYNAMIC].sh_name      = shstrtab_dynamic_offset;
-  section_headers[SH_DYNAMIC].sh_type      = SHT_DYNAMIC;
-  section_headers[SH_DYNAMIC].sh_flags     = SHF_WRITE | SHF_ALLOC;
-  section_headers[SH_DYNAMIC].sh_addr      = dynamic_offset;
-  section_headers[SH_DYNAMIC].sh_offset    = dynamic_offset;
-  section_headers[SH_DYNAMIC].sh_size      = dynamic_size;
-  section_headers[SH_DYNAMIC].sh_link      = SH_DYNSTR;
-  section_headers[SH_DYNAMIC].sh_info      = 0;
-  section_headers[SH_DYNAMIC].sh_addralign = dynamic_alignment;
-  section_headers[SH_DYNAMIC].sh_entsize   = sizeof(Elf32_Dyn);
-
-  section_headers[SH_SHSTRTAB].sh_name      = shstrtab_shstrtab_offset;
-  section_headers[SH_SHSTRTAB].sh_type      = SHT_STRTAB;
-  section_headers[SH_SHSTRTAB].sh_flags     = 0;
-  section_headers[SH_SHSTRTAB].sh_addr      = shstrtab_offset;
-  section_headers[SH_SHSTRTAB].sh_offset    = shstrtab_offset;
-  section_headers[SH_SHSTRTAB].sh_size      = shstrtab_size;
-  section_headers[SH_SHSTRTAB].sh_link      = 0;
-  section_headers[SH_SHSTRTAB].sh_info      = 0;
-  section_headers[SH_SHSTRTAB].sh_addralign = shstrtab_alignment;
-  section_headers[SH_SHSTRTAB].sh_entsize   = 0;
-
-  if (generateDebugInformation) {
-    section_headers[SH_DBG_INFO].sh_name      = shstrtab_debug_info_offset;
-    section_headers[SH_DBG_INFO].sh_type      = SHT_PROGBITS;
-    section_headers[SH_DBG_INFO].sh_flags     = 0;
-    section_headers[SH_DBG_INFO].sh_addr      = 0;
-    section_headers[SH_DBG_INFO].sh_offset    = shdbg_info_offset;
-    section_headers[SH_DBG_INFO].sh_size      = shdbg_info_size;
-    section_headers[SH_DBG_INFO].sh_link      = 0;
-    section_headers[SH_DBG_INFO].sh_info      = 0;
-    section_headers[SH_DBG_INFO].sh_addralign = shdbg_info_alignment;
-    section_headers[SH_DBG_INFO].sh_entsize   = 0;
-
-    section_headers[SH_DBG_ABRV].sh_name      = shstrtab_debug_abbrev_offset;
-    section_headers[SH_DBG_ABRV].sh_type      = SHT_PROGBITS;
-    section_headers[SH_DBG_ABRV].sh_flags     = 0;
-    section_headers[SH_DBG_ABRV].sh_addr      = 0;
-    section_headers[SH_DBG_ABRV].sh_offset    = shdbg_abbrev_offset;
-    section_headers[SH_DBG_ABRV].sh_size      = shdbg_abbrev_size;
-    section_headers[SH_DBG_ABRV].sh_link      = 0;
-    section_headers[SH_DBG_ABRV].sh_info      = 0;
-    section_headers[SH_DBG_ABRV].sh_addralign = shdbg_abbrev_alignment;
-    section_headers[SH_DBG_ABRV].sh_entsize   = 0;
-
-    section_headers[SH_DBG_FRM].sh_name      = shstrtab_debug_frame_offset;
-    section_headers[SH_DBG_FRM].sh_type      = SHT_PROGBITS;
-    section_headers[SH_DBG_FRM].sh_flags     = 0;
-    section_headers[SH_DBG_FRM].sh_addr      = 0;
-    section_headers[SH_DBG_FRM].sh_offset    = shdbg_frm_offset;
-    section_headers[SH_DBG_FRM].sh_size      = shdbg_frm_size;
-    section_headers[SH_DBG_FRM].sh_link      = 0;
-    section_headers[SH_DBG_FRM].sh_info      = 0;
-    section_headers[SH_DBG_FRM].sh_addralign = shdbg_frm_alignment;
-    section_headers[SH_DBG_FRM].sh_entsize   = 0;
-
-    section_headers[SH_DBG_STR].sh_name      = shstrtab_debug_str_offset;
-    section_headers[SH_DBG_STR].sh_type      = SHT_PROGBITS;
-    section_headers[SH_DBG_STR].sh_flags     = 0;
-    section_headers[SH_DBG_STR].sh_addr      = 0;
-    section_headers[SH_DBG_STR].sh_offset    = shdbg_str_offset;
-    section_headers[SH_DBG_STR].sh_size      = shdbg_str_size;
-    section_headers[SH_DBG_STR].sh_link      = 0;
-    section_headers[SH_DBG_STR].sh_info      = 0;
-    section_headers[SH_DBG_STR].sh_addralign = shdbg_str_alignment;
-    section_headers[SH_DBG_STR].sh_entsize   = 0;
+  // Get the layout of the extra sections. (This will deal with the debug
+  // sections if they are there)
+  for (auto it = other_builders_.begin(); it != other_builders_.end(); ++it) {
+    it->section_.sh_offset = NextOffset(it->section_, prev);
+    it->section_.sh_addr = 0;
+    it->section_.sh_size = it->GetBuffer()->size();
+    it->section_.sh_link = it->GetLink();
+    pieces.push_back(ElfFilePiece(it->name_, it->section_.sh_offset,
+                                  it->GetBuffer()->data(), it->GetBuffer()->size()));
+    prev = it->section_;
+    if (debug_logging_) {
+      LOG(INFO) << it->name_ << " off=" << it->section_.sh_offset
+                << " " << it->name_ << " size=" << it->section_.sh_size;
+    }
+  }
+  // Get the layout of the shstrtab section
+  shstrtab_builder_.section_.sh_offset = NextOffset(shstrtab_builder_.section_, prev);
+  shstrtab_builder_.section_.sh_addr = 0;
+  shstrtab_builder_.section_.sh_size = shstrtab.size();
+  shstrtab_builder_.section_.sh_link = shstrtab_builder_.GetLink();
+  if (debug_logging_) {
+      LOG(INFO) << "shstrtab off=" << shstrtab_builder_.section_.sh_offset
+                << " shstrtab size=" << shstrtab_builder_.section_.sh_size;
   }
 
-  // phase 3: writing file
+  // The section list comes after come after.
+  Elf32_Word sections_offset = RoundUp(
+      shstrtab_builder_.section_.sh_offset + shstrtab_builder_.section_.sh_size,
+      sizeof(Elf32_Word));
 
-  // Elf32_Ehdr
-  if (!elf_file_->WriteFully(&elf_header, sizeof(elf_header))) {
-    PLOG(ERROR) << "Failed to write ELF header for " << elf_file_->GetPath();
+  // Setup the actual symbol arrays.
+  std::vector<Elf32_Sym> dynsym = dynsym_builder_.GenerateSymtab();
+  CHECK_EQ(dynsym.size() * sizeof(Elf32_Sym), dynsym_builder_.section_.sh_size);
+  std::vector<Elf32_Sym> symtab;
+  if (IncludingDebugSymbols()) {
+    symtab = symtab_builder_.GenerateSymtab();
+    CHECK_EQ(symtab.size() * sizeof(Elf32_Sym), symtab_builder_.section_.sh_size);
+  }
+
+  // Setup the dynamic section.
+  // This will add the 2 values we cannot know until now time, namely the size
+  // and the soname_offset.
+  std::vector<Elf32_Dyn> dynamic = dynamic_builder_.GetDynamics(dynstr.size(),
+                                                                dynstr_soname_offset);
+  CHECK_EQ(dynamic.size() * sizeof(Elf32_Dyn), dynamic_builder_.section_.sh_size);
+
+  // Finish setup of the program headers now that we know the layout of the
+  // whole file.
+  Elf32_Word load_r_size = rodata_builder_.section_.sh_offset + rodata_builder_.section_.sh_size;
+  program_headers[PH_LOAD_R__].p_filesz = load_r_size;
+  program_headers[PH_LOAD_R__].p_memsz =  load_r_size;
+  program_headers[PH_LOAD_R__].p_align =  rodata_builder_.section_.sh_addralign;
+
+  Elf32_Word load_rx_size = text_builder_.section_.sh_size;
+  program_headers[PH_LOAD_R_X].p_offset = text_builder_.section_.sh_offset;
+  program_headers[PH_LOAD_R_X].p_vaddr  = text_builder_.section_.sh_offset;
+  program_headers[PH_LOAD_R_X].p_paddr  = text_builder_.section_.sh_offset;
+  program_headers[PH_LOAD_R_X].p_filesz = load_rx_size;
+  program_headers[PH_LOAD_R_X].p_memsz  = load_rx_size;
+  program_headers[PH_LOAD_R_X].p_align  = text_builder_.section_.sh_addralign;
+
+  program_headers[PH_LOAD_RW_].p_offset = dynamic_builder_.section_.sh_offset;
+  program_headers[PH_LOAD_RW_].p_vaddr  = dynamic_builder_.section_.sh_offset;
+  program_headers[PH_LOAD_RW_].p_paddr  = dynamic_builder_.section_.sh_offset;
+  program_headers[PH_LOAD_RW_].p_filesz = dynamic_builder_.section_.sh_size;
+  program_headers[PH_LOAD_RW_].p_memsz  = dynamic_builder_.section_.sh_size;
+  program_headers[PH_LOAD_RW_].p_align  = dynamic_builder_.section_.sh_addralign;
+
+  program_headers[PH_DYNAMIC].p_offset = dynamic_builder_.section_.sh_offset;
+  program_headers[PH_DYNAMIC].p_vaddr  = dynamic_builder_.section_.sh_offset;
+  program_headers[PH_DYNAMIC].p_paddr  = dynamic_builder_.section_.sh_offset;
+  program_headers[PH_DYNAMIC].p_filesz = dynamic_builder_.section_.sh_size;
+  program_headers[PH_DYNAMIC].p_memsz  = dynamic_builder_.section_.sh_size;
+  program_headers[PH_DYNAMIC].p_align  = dynamic_builder_.section_.sh_addralign;
+
+  // Finish setup of the Ehdr values.
+  elf_header_.e_phoff = phdr_offset;
+  elf_header_.e_shoff = sections_offset;
+  elf_header_.e_phnum = PH_NUM;
+  elf_header_.e_shnum = section_ptrs.size();
+  elf_header_.e_shstrndx = shstrtab_builder_.section_index_;
+
+  // Add the rest of the pieces to the list.
+  pieces.push_back(ElfFilePiece("Elf Header", 0, &elf_header_, sizeof(elf_header_)));
+  pieces.push_back(ElfFilePiece("Program headers", phdr_offset,
+                                &program_headers, sizeof(program_headers)));
+  pieces.push_back(ElfFilePiece(".dynamic", dynamic_builder_.section_.sh_offset,
+                                dynamic.data(), dynamic_builder_.section_.sh_size));
+  pieces.push_back(ElfFilePiece(".dynsym", dynsym_builder_.section_.sh_offset,
+                                dynsym.data(), dynsym.size() * sizeof(Elf32_Sym)));
+  pieces.push_back(ElfFilePiece(".dynstr", dynsym_builder_.strtab_.section_.sh_offset,
+                                dynstr.c_str(), dynstr.size()));
+  pieces.push_back(ElfFilePiece(".hash", hash_builder_.section_.sh_offset,
+                                hash.data(), hash.size() * sizeof(Elf32_Word)));
+  pieces.push_back(ElfFilePiece(".rodata", rodata_builder_.section_.sh_offset,
+                                NULL, rodata_builder_.section_.sh_size));
+  pieces.push_back(ElfFilePiece(".text", text_builder_.section_.sh_offset,
+                                NULL, text_builder_.section_.sh_size));
+  if (IncludingDebugSymbols()) {
+    pieces.push_back(ElfFilePiece(".symtab", symtab_builder_.section_.sh_offset,
+                                  symtab.data(), symtab.size() * sizeof(Elf32_Sym)));
+    pieces.push_back(ElfFilePiece(".strtab", symtab_builder_.strtab_.section_.sh_offset,
+                                  strtab.c_str(), strtab.size()));
+  }
+  pieces.push_back(ElfFilePiece(".shstrtab", shstrtab_builder_.section_.sh_offset,
+                                &shstrtab[0], shstrtab.size()));
+  for (uint32_t i = 0; i < section_ptrs.size(); ++i) {
+    // Just add all the sections in induvidually since they are all over the
+    // place on the heap/stack.
+    Elf32_Word cur_off = sections_offset + i * sizeof(Elf32_Shdr);
+    pieces.push_back(ElfFilePiece("section table piece", cur_off,
+                                  section_ptrs[i], sizeof(Elf32_Shdr)));
+  }
+
+  if (!WriteOutFile(pieces)) {
+    LOG(ERROR) << "Unable to write to file " << elf_file_->GetPath();
     return false;
   }
-
-  // PHDR
-  if (static_cast<off_t>(phdr_offset) != lseek(elf_file_->Fd(), 0, SEEK_CUR)) {
-    PLOG(ERROR) << "Failed to be at expected ELF program header offset phdr_offset "
-                << " for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!elf_file_->WriteFully(program_headers, sizeof(program_headers))) {
-    PLOG(ERROR) << "Failed to write ELF program headers for " << elf_file_->GetPath();
-    return false;
-  }
-
-  // .dynsym
-  DCHECK_LE(phdr_offset + phdr_size, dynsym_offset);
-  if (static_cast<off_t>(dynsym_offset) != lseek(elf_file_->Fd(), dynsym_offset, SEEK_SET)) {
-    PLOG(ERROR) << "Failed to seek to .dynsym offset location " << dynsym_offset
-                << " for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!elf_file_->WriteFully(dynsym, sizeof(dynsym))) {
-    PLOG(ERROR) << "Failed to write .dynsym for " << elf_file_->GetPath();
-    return false;
-  }
-
-  // .dynstr
-  DCHECK_LE(dynsym_offset + dynsym_size, dynstr_offset);
-  if (static_cast<off_t>(dynstr_offset) != lseek(elf_file_->Fd(), dynstr_offset, SEEK_SET)) {
-    PLOG(ERROR) << "Failed to seek to .dynstr offset " << dynstr_offset
-                << " for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!elf_file_->WriteFully(&dynstr[0], dynstr_size)) {
-    PLOG(ERROR) << "Failed to write .dynsym for " << elf_file_->GetPath();
-    return false;
-  }
-
-  // .hash
-  DCHECK_LE(dynstr_offset + dynstr_size, hash_offset);
-  if (static_cast<off_t>(hash_offset) != lseek(elf_file_->Fd(), hash_offset, SEEK_SET)) {
-    PLOG(ERROR) << "Failed to seek to .hash offset " << hash_offset
-                << " for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!elf_file_->WriteFully(hash, sizeof(hash))) {
-    PLOG(ERROR) << "Failed to write .dynsym for " << elf_file_->GetPath();
-    return false;
-  }
-
-  // .rodata .text
-  DCHECK_LE(hash_offset + hash_size, oat_data_offset);
+  // write out the actual oat file data.
+  Elf32_Word oat_data_offset = rodata_builder_.section_.sh_offset;
   if (static_cast<off_t>(oat_data_offset) != lseek(elf_file_->Fd(), oat_data_offset, SEEK_SET)) {
     PLOG(ERROR) << "Failed to seek to .rodata offset " << oat_data_offset
                 << " for " << elf_file_->GetPath();
     return false;
   }
-  std::unique_ptr<BufferedOutputStream> output_stream(new BufferedOutputStream(new FileOutputStream(elf_file_)));
-  if (!oat_writer->Write(output_stream.get())) {
+  std::unique_ptr<BufferedOutputStream> output_stream(
+      new BufferedOutputStream(new FileOutputStream(elf_file_)));
+  if (!oat_writer_->Write(output_stream.get())) {
     PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file_->GetPath();
     return false;
   }
 
-  // .dynamic
-  DCHECK_LE(oat_data_offset + oat_writer->GetSize(), dynamic_offset);
-  if (static_cast<off_t>(dynamic_offset) != lseek(elf_file_->Fd(), dynamic_offset, SEEK_SET)) {
-    PLOG(ERROR) << "Failed to seek to .dynamic offset " << dynamic_offset
-                << " for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!elf_file_->WriteFully(&dynamic_headers[0], dynamic_size)) {
-    PLOG(ERROR) << "Failed to write .dynamic for " << elf_file_->GetPath();
-    return false;
-  }
-
-  // .shstrtab
-  DCHECK_LE(dynamic_offset + dynamic_size, shstrtab_offset);
-  if (static_cast<off_t>(shstrtab_offset) != lseek(elf_file_->Fd(), shstrtab_offset, SEEK_SET)) {
-    PLOG(ERROR) << "Failed to seek to .shstrtab offset " << shstrtab_offset
-                << " for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!elf_file_->WriteFully(&shstrtab[0], shstrtab_size)) {
-    PLOG(ERROR) << "Failed to write .shstrtab for " << elf_file_->GetPath();
-    return false;
-  }
-
-  if (generateDebugInformation) {
-    // .debug_info
-    DCHECK_LE(shstrtab_offset + shstrtab_size, shdbg_info_offset);
-    if (static_cast<off_t>(shdbg_info_offset) != lseek(elf_file_->Fd(), shdbg_info_offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to .shdbg_info offset " << shdbg_info_offset
-                  << " for " << elf_file_->GetPath();
-      return false;
-    }
-    if (!elf_file_->WriteFully(&dbg_info[0], shdbg_info_size)) {
-      PLOG(ERROR) << "Failed to write .debug_info for " << elf_file_->GetPath();
-      return false;
-    }
-
-    // .debug_abbrev
-    DCHECK_LE(shdbg_info_offset + shdbg_info_size, shdbg_abbrev_offset);
-    if (static_cast<off_t>(shdbg_abbrev_offset) != lseek(elf_file_->Fd(), shdbg_abbrev_offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to .shdbg_abbrev offset " << shdbg_abbrev_offset
-                  << " for " << elf_file_->GetPath();
-      return false;
-    }
-    if (!elf_file_->WriteFully(&dbg_abbrev[0], shdbg_abbrev_size)) {
-      PLOG(ERROR) << "Failed to write .debug_abbrev for " << elf_file_->GetPath();
-      return false;
-    }
-
-    // .debug_frame
-    DCHECK_LE(shdbg_abbrev_offset + shdbg_abbrev_size, shdbg_frm_offset);
-    if (static_cast<off_t>(shdbg_frm_offset) != lseek(elf_file_->Fd(), shdbg_frm_offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to .shdbg_frm offset " << shdbg_frm_offset
-                  << " for " << elf_file_->GetPath();
-      return false;
-    }
-    if (!elf_file_->WriteFully(&((*compiler_driver_->GetCallFrameInformation())[0]), shdbg_frm_size)) {
-      PLOG(ERROR) << "Failed to write .debug_frame for " << elf_file_->GetPath();
-      return false;
-    }
-
-    // .debug_str
-    DCHECK_LE(shdbg_frm_offset + shdbg_frm_size, shdbg_str_offset);
-    if (static_cast<off_t>(shdbg_str_offset) != lseek(elf_file_->Fd(), shdbg_str_offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to .shdbg_str offset " << shdbg_str_offset
-                  << " for " << elf_file_->GetPath();
-      return false;
-    }
-    if (!elf_file_->WriteFully(&dbg_str[0], shdbg_str_size)) {
-      PLOG(ERROR) << "Failed to write .debug_frame for " << elf_file_->GetPath();
-      return false;
-    }
-  }
-
-  // section headers (after all sections)
-  if (generateDebugInformation) {
-    DCHECK_LE(shdbg_str_offset + shdbg_str_size, shdr_offset);
-  } else {
-    DCHECK_LE(shstrtab_offset + shstrtab_size, shdr_offset);
-  }
-  if (static_cast<off_t>(shdr_offset) != lseek(elf_file_->Fd(), shdr_offset, SEEK_SET)) {
-    PLOG(ERROR) << "Failed to seek to ELF section headers offset " << shdr_offset
-                << " for " << elf_file_->GetPath();
-    return false;
-  }
-  if (!elf_file_->WriteFully(section_headers, sizeof(section_headers))) {
-    PLOG(ERROR) << "Failed to write ELF section headers for " << elf_file_->GetPath();
-    return false;
-  }
-
-  VLOG(compiler) << "ELF file written successfully: " << elf_file_->GetPath();
   return true;
-}  // NOLINT(readability/fn_size)
+}
+
+bool ElfWriterQuick::ElfBuilder::WriteOutFile(const std::vector<ElfFilePiece>& pieces) {
+  // TODO It would be nice if this checked for overlap.
+  for (auto it = pieces.begin(); it != pieces.end(); ++it) {
+    if (it->data_) {
+      if (static_cast<off_t>(it->offset_) != lseek(elf_file_->Fd(), it->offset_, SEEK_SET)) {
+        PLOG(ERROR) << "Failed to seek to " << it->dbg_name_ << " offset location "
+                    << it->offset_ << " for " << elf_file_->GetPath();
+        return false;
+      }
+      if (!elf_file_->WriteFully(it->data_, it->size_)) {
+        PLOG(ERROR) << "Failed to write " << it->dbg_name_ << " for " << elf_file_->GetPath();
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+void ElfWriterQuick::ElfBuilder::SetupDynamic() {
+  dynamic_builder_.AddDynamicTag(DT_HASH, 0, &hash_builder_);
+  dynamic_builder_.AddDynamicTag(DT_STRTAB, 0, &dynsym_builder_.strtab_);
+  dynamic_builder_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_builder_);
+  dynamic_builder_.AddDynamicTag(DT_SYMENT, sizeof(Elf32_Sym));
+}
+
+void ElfWriterQuick::ElfBuilder::SetupRequiredSymbols() {
+  dynsym_builder_.AddSymbol("oatdata", &rodata_builder_, 0, true,
+                            rodata_builder_.size_, STB_GLOBAL, STT_OBJECT);
+  dynsym_builder_.AddSymbol("oatexec", &text_builder_, 0, true,
+                            text_builder_.size_, STB_GLOBAL, STT_OBJECT);
+  dynsym_builder_.AddSymbol("oatlastword", &text_builder_, text_builder_.size_ - 4,
+                            true, 4, STB_GLOBAL, STT_OBJECT);
+}
+
+void ElfWriterQuick::ElfDynamicBuilder::AddDynamicTag(Elf32_Sword tag, Elf32_Word d_un) {
+  if (tag == DT_NULL) {
+    return;
+  }
+  dynamics_.push_back({NULL, tag, d_un});
+}
+
+void ElfWriterQuick::ElfDynamicBuilder::AddDynamicTag(Elf32_Sword tag, Elf32_Word d_un,
+                                                      ElfSectionBuilder* section) {
+  if (tag == DT_NULL) {
+    return;
+  }
+  dynamics_.push_back({section, tag, d_un});
+}
+
+std::vector<Elf32_Dyn> ElfWriterQuick::ElfDynamicBuilder::GetDynamics(Elf32_Word strsz,
+                                                                      Elf32_Word soname) {
+  std::vector<Elf32_Dyn> ret;
+  for (auto it = dynamics_.cbegin(); it != dynamics_.cend(); ++it) {
+    if (it->section_) {
+      // We are adding an address relative to a section.
+      ret.push_back(
+          {it->tag_, {it->off_ + it->section_->section_.sh_addr}});
+    } else {
+      ret.push_back({it->tag_, {it->off_}});
+    }
+  }
+  ret.push_back({DT_STRSZ, {strsz}});
+  ret.push_back({DT_SONAME, {soname}});
+  ret.push_back({DT_NULL, {0}});
+  return ret;
+}
+
+std::vector<Elf32_Sym> ElfWriterQuick::ElfSymtabBuilder::GenerateSymtab() {
+  std::vector<Elf32_Sym> ret;
+  Elf32_Sym undef_sym;
+  memset(&undef_sym, 0, sizeof(undef_sym));
+  undef_sym.st_shndx = SHN_UNDEF;
+  ret.push_back(undef_sym);
+
+  for (auto it = symbols_.cbegin(); it != symbols_.cend(); ++it) {
+    Elf32_Sym sym;
+    memset(&sym, 0, sizeof(sym));
+    sym.st_name = it->name_idx_;
+    if (it->is_relative_) {
+      sym.st_value = it->addr_ + it->section_->section_.sh_offset;
+    } else {
+      sym.st_value = it->addr_;
+    }
+    sym.st_size = it->size_;
+    sym.st_other = it->other_;
+    sym.st_shndx = it->section_->section_index_;
+    sym.st_info = it->info_;
+
+    ret.push_back(sym);
+  }
+  return ret;
+}
+
+std::string ElfWriterQuick::ElfSymtabBuilder::GenerateStrtab() {
+  std::string tab;
+  tab += '\0';
+  for (auto it = symbols_.begin(); it != symbols_.end(); ++it) {
+    it->name_idx_ = tab.size();
+    tab += it->name_;
+    tab += '\0';
+  }
+  strtab_.section_.sh_size = tab.size();
+  return tab;
+}
+
+void ElfWriterQuick::ElfBuilder::AssignSectionStr(
+    ElfSectionBuilder* builder, std::string* strtab) {
+  builder->section_.sh_name = strtab->size();
+  *strtab += builder->name_;
+  *strtab += '\0';
+  if (debug_logging_) {
+    LOG(INFO) << "adding section name \"" << builder->name_ << "\" "
+              << "to shstrtab at offset " << builder->section_.sh_name;
+  }
+}
+
+// from bionic
+static unsigned elfhash(const char *_name) {
+  const unsigned char *name = (const unsigned char *) _name;
+  unsigned h = 0, g;
+
+  while (*name) {
+    h = (h << 4) + *name++;
+    g = h & 0xf0000000;
+    h ^= g;
+    h ^= g >> 24;
+  }
+  return h;
+}
+
+
+std::vector<Elf32_Word> ElfWriterQuick::ElfSymtabBuilder::GenerateHashContents() {
+  // Here is how The ELF hash table works.
+  // There are 3 arrays to worry about.
+  // * The symbol table where the symbol information is.
+  // * The bucket array which is an array of indexes into the symtab and chain.
+  // * The chain array which is also an array of indexes into the symtab and chain.
+  //
+  // Lets say the state is something like this.
+  // +--------+       +--------+      +-----------+
+  // | symtab |       | bucket |      |   chain   |
+  // |  NULL  |       | 1      |      | STN_UNDEF |
+  // | <sym1> |       | 4      |      | 2         |
+  // | <sym2> |       |        |      | 5         |
+  // | <sym3> |       |        |      | STN_UNDEF |
+  // | <sym4> |       |        |      | 3         |
+  // | <sym5> |       |        |      | STN_UNDEF |
+  // +--------+       +--------+      +-----------+
+  //
+  // The lookup process (in python psudocode) is
+  //
+  // def GetSym(name):
+  //     # NB STN_UNDEF == 0
+  //     indx = bucket[elfhash(name) % num_buckets]
+  //     while indx != STN_UNDEF:
+  //         if GetSymbolName(symtab[indx]) == name:
+  //             return symtab[indx]
+  //         indx = chain[indx]
+  //     return SYMBOL_NOT_FOUND
+  //
+  // Between bucket and chain arrays every symtab index must be present exactly
+  // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
+
+  // Select number of buckets.
+  // This is essentially arbitrary.
+  Elf32_Word nbuckets;
+  Elf32_Word chain_size = GetSize();
+  if (symbols_.size() < 8) {
+    nbuckets = 2;
+  } else if (symbols_.size() < 32) {
+    nbuckets = 4;
+  } else if (symbols_.size() < 256) {
+    nbuckets = 16;
+  } else {
+    // Have about 32 ids per bucket.
+    nbuckets = RoundUp(symbols_.size()/32, 2);
+  }
+  std::vector<Elf32_Word> hash;
+  hash.push_back(nbuckets);
+  hash.push_back(chain_size);
+  uint32_t bucket_offset = hash.size();
+  uint32_t chain_offset = bucket_offset + nbuckets;
+  hash.resize(hash.size() + nbuckets + chain_size, 0);
+
+  Elf32_Word* buckets = hash.data() + bucket_offset;
+  Elf32_Word* chain   = hash.data() + chain_offset;
+
+  // Set up the actual hash table.
+  for (Elf32_Word i = 0; i < symbols_.size(); i++) {
+    // Add 1 since we need to have the null symbol that is not in the symbols
+    // list.
+    Elf32_Word index = i + 1;
+    Elf32_Word hash_val = static_cast<Elf32_Word>(elfhash(symbols_[i].name_.c_str())) % nbuckets;
+    if (buckets[hash_val] == 0) {
+      buckets[hash_val] = index;
+    } else {
+      hash_val = buckets[hash_val];
+      CHECK_LT(hash_val, chain_size);
+      while (chain[hash_val] != 0) {
+        hash_val = chain[hash_val];
+        CHECK_LT(hash_val, chain_size);
+      }
+      chain[hash_val] = index;
+      // Check for loops. Works because if this is non-empty then there must be
+      // another cell which already contains the same symbol index as this one,
+      // which means some symbol has more then one name, which isn't allowed.
+      CHECK_EQ(chain[index], static_cast<Elf32_Word>(0));
+    }
+  }
+
+  return hash;
+}
+
+void ElfWriterQuick::ElfBuilder::SetupEhdr() {
+  memset(&elf_header_, 0, sizeof(elf_header_));
+  elf_header_.e_ident[EI_MAG0]       = ELFMAG0;
+  elf_header_.e_ident[EI_MAG1]       = ELFMAG1;
+  elf_header_.e_ident[EI_MAG2]       = ELFMAG2;
+  elf_header_.e_ident[EI_MAG3]       = ELFMAG3;
+  elf_header_.e_ident[EI_CLASS]      = ELFCLASS32;
+  elf_header_.e_ident[EI_DATA]       = ELFDATA2LSB;
+  elf_header_.e_ident[EI_VERSION]    = EV_CURRENT;
+  elf_header_.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
+  elf_header_.e_ident[EI_ABIVERSION] = 0;
+  elf_header_.e_type = ET_DYN;
+  elf_header_.e_version = 1;
+  elf_header_.e_entry = 0;
+  elf_header_.e_ehsize = sizeof(Elf32_Ehdr);
+  elf_header_.e_phentsize = sizeof(Elf32_Phdr);
+  elf_header_.e_shentsize = sizeof(Elf32_Shdr);
+  elf_header_.e_phoff = sizeof(Elf32_Ehdr);
+}
+
+void ElfWriterQuick::ElfBuilder::SetISA(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall through.
+    case kThumb2: {
+      elf_header_.e_machine = EM_ARM;
+      elf_header_.e_flags = EF_ARM_EABI_VER5;
+      break;
+    }
+    case kArm64: {
+      elf_header_.e_machine = EM_AARCH64;
+      elf_header_.e_flags = 0;
+      break;
+    }
+    case kX86: {
+      elf_header_.e_machine = EM_386;
+      elf_header_.e_flags = 0;
+      break;
+    }
+    case kX86_64: {
+      elf_header_.e_machine = EM_X86_64;
+      elf_header_.e_flags = 0;
+      break;
+    }
+    case kMips: {
+      elf_header_.e_machine = EM_MIPS;
+      elf_header_.e_flags = (EF_MIPS_NOREORDER |
+                             EF_MIPS_PIC       |
+                             EF_MIPS_CPIC      |
+                             EF_MIPS_ABI_O32   |
+                             EF_MIPS_ARCH_32R2);
+      break;
+    }
+    default: {
+      fatal_error_ = true;
+      LOG(FATAL) << "Unknown instruction set: " << isa;
+      break;
+    }
+  }
+}
+
+void ElfWriterQuick::ElfSymtabBuilder::AddSymbol(
+    const std::string& name, const ElfSectionBuilder* section, Elf32_Addr addr,
+    bool is_relative, Elf32_Word size, uint8_t binding, uint8_t type, uint8_t other) {
+  CHECK(section);
+  ElfSymtabBuilder::ElfSymbolState state {name, section, addr, size, is_relative,
+                                          MakeStInfo(binding, type), other, 0};
+  symbols_.push_back(state);
+}
+
+bool ElfWriterQuick::Create(File* elf_file,
+                            OatWriter* oat_writer,
+                            const std::vector<const DexFile*>& dex_files,
+                            const std::string& android_root,
+                            bool is_host,
+                            const CompilerDriver& driver) {
+  ElfWriterQuick elf_writer(driver, elf_file);
+  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
+}
+
+bool ElfWriterQuick::Write(OatWriter* oat_writer,
+                           const std::vector<const DexFile*>& dex_files_unused,
+                           const std::string& android_root_unused,
+                           bool is_host_unused) {
+  const bool debug = false;
+  const bool add_symbols = oat_writer->DidAddSymbols();
+  const OatHeader& oat_header = oat_writer->GetOatHeader();
+  Elf32_Word oat_data_size = oat_header.GetExecutableOffset();
+  uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
+
+  ElfBuilder builder(oat_writer, elf_file_, compiler_driver_->GetInstructionSet(), 0,
+                     oat_data_size, oat_data_size, oat_exec_size, add_symbols, debug);
+
+  if (add_symbols) {
+    AddDebugSymbols(builder, oat_writer, debug);
+  }
+
+  bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr;
+  if (generateDebugInformation) {
+    ElfRawSectionBuilder debug_info(".debug_info",   SHT_PROGBITS, 0, NULL, 0, 1, 0);
+    ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, NULL, 0, 1, 0);
+    ElfRawSectionBuilder debug_str(".debug_str",    SHT_PROGBITS, 0, NULL, 0, 1, 0);
+    ElfRawSectionBuilder debug_frame(".debug_frame",  SHT_PROGBITS, 0, NULL, 0, 4, 0);
+    debug_frame.SetBuffer(*compiler_driver_->GetCallFrameInformation());
+
+    FillInCFIInformation(oat_writer, debug_info.GetBuffer(),
+                         debug_abbrev.GetBuffer(), debug_str.GetBuffer());
+    builder.RegisterRawSection(debug_info);
+    builder.RegisterRawSection(debug_abbrev);
+    builder.RegisterRawSection(debug_frame);
+    builder.RegisterRawSection(debug_str);
+  }
+
+  return builder.Write();
+}
+
+void ElfWriterQuick::AddDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer, bool debug) {
+  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
+  ElfSymtabBuilder* symtab = &builder.symtab_builder_;
+  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
+    symtab->AddSymbol(it->method_name_, &builder.text_builder_, it->low_pc_, true,
+                      it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+  }
+}
 
 static void UpdateWord(std::vector<uint8_t>*buf, int offset, int data) {
   (*buf)[offset+0] = data;
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index dec75dc..dbdccfc 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -17,7 +17,9 @@
 #ifndef ART_COMPILER_ELF_WRITER_QUICK_H_
 #define ART_COMPILER_ELF_WRITER_QUICK_H_
 
+#include "elf_utils.h"
 #include "elf_writer.h"
+#include "instruction_set.h"
 
 namespace art {
 
@@ -45,6 +47,268 @@
     : ElfWriter(driver, elf_file) {}
   ~ElfWriterQuick() {}
 
+  class ElfBuilder;
+  void AddDebugSymbols(ElfBuilder& builder,
+                       OatWriter* oat_writer,
+                       bool debug);
+
+  class ElfSectionBuilder {
+   public:
+    ElfSectionBuilder(const std::string& sec_name, Elf32_Word type, Elf32_Word flags,
+                      const ElfSectionBuilder *link, Elf32_Word info, Elf32_Word align,
+                      Elf32_Word entsize)
+        : name_(sec_name), link_(link) {
+      memset(&section_, 0, sizeof(section_));
+      section_.sh_type = type;
+      section_.sh_flags = flags;
+      section_.sh_info = info;
+      section_.sh_addralign = align;
+      section_.sh_entsize = entsize;
+    }
+
+    virtual ~ElfSectionBuilder() {}
+
+    Elf32_Shdr section_;
+    Elf32_Word section_index_ = 0;
+
+   protected:
+    const std::string name_;
+    const ElfSectionBuilder* link_;
+
+    Elf32_Word GetLink() {
+      return (link_) ? link_->section_index_ : 0;
+    }
+
+   private:
+    friend class ElfBuilder;
+  };
+
+  class ElfDynamicBuilder : public ElfSectionBuilder {
+   public:
+    void AddDynamicTag(Elf32_Sword tag, Elf32_Word d_un);
+    void AddDynamicTag(Elf32_Sword tag, Elf32_Word offset, ElfSectionBuilder* section);
+
+    ElfDynamicBuilder(const std::string& sec_name, ElfSectionBuilder *link)
+        : ElfSectionBuilder(sec_name, SHT_DYNAMIC, SHF_ALLOC | SHF_ALLOC, link,
+                            0, kPageSize, sizeof(Elf32_Dyn)) {}
+    ~ElfDynamicBuilder() {}
+
+   protected:
+    struct ElfDynamicState {
+      ElfSectionBuilder* section_;
+      Elf32_Sword tag_;
+      Elf32_Word off_;
+    };
+    std::vector<ElfDynamicState> dynamics_;
+    Elf32_Word GetSize() {
+      // Add 1 for the DT_NULL, 1 for DT_STRSZ, and 1 for DT_SONAME. All of
+      // these must be added when we actually put the file together because
+      // their values are very dependent on state.
+      return dynamics_.size() + 3;
+    }
+
+    // Create the actual dynamic vector. strsz should be the size of the .dynstr
+    // table and soname_off should be the offset of the soname in .dynstr.
+    // Since niether can be found prior to final layout we will wait until here
+    // to add them.
+    std::vector<Elf32_Dyn> GetDynamics(Elf32_Word strsz, Elf32_Word soname_off);
+
+   private:
+    friend class ElfBuilder;
+  };
+
+  class ElfRawSectionBuilder : public ElfSectionBuilder {
+   public:
+    ElfRawSectionBuilder(const std::string& sec_name, Elf32_Word type, Elf32_Word flags,
+                         const ElfSectionBuilder* link, Elf32_Word info, Elf32_Word align,
+                         Elf32_Word entsize)
+        : ElfSectionBuilder(sec_name, type, flags, link, info, align, entsize) {}
+    ~ElfRawSectionBuilder() {}
+    std::vector<uint8_t>* GetBuffer() { return &buf_; }
+    void SetBuffer(std::vector<uint8_t> buf) { buf_ = buf; }
+
+   protected:
+    std::vector<uint8_t> buf_;
+
+   private:
+    friend class ElfBuilder;
+  };
+
+  class ElfOatSectionBuilder : public ElfSectionBuilder {
+   public:
+    ElfOatSectionBuilder(const std::string& sec_name, Elf32_Word size, Elf32_Word offset,
+                         Elf32_Word type, Elf32_Word flags)
+        : ElfSectionBuilder(sec_name, type, flags, NULL, 0, kPageSize, 0),
+          offset_(offset), size_(size) {}
+    ~ElfOatSectionBuilder() {}
+
+   protected:
+    // Offset of the content within the file.
+    Elf32_Word offset_;
+    // Size of the content within the file.
+    Elf32_Word size_;
+
+   private:
+    friend class ElfBuilder;
+  };
+
+  class ElfSymtabBuilder : public ElfSectionBuilder {
+   public:
+    // Add a symbol with given name to this symtab. The symbol refers to
+    // 'relative_addr' within the given section and has the given attributes.
+    void AddSymbol(const std::string& name,
+                   const ElfSectionBuilder* section,
+                   Elf32_Addr addr,
+                   bool is_relative,
+                   Elf32_Word size,
+                   uint8_t binding,
+                   uint8_t type,
+                   uint8_t other = 0);
+
+    ElfSymtabBuilder(const std::string& sec_name, Elf32_Word type,
+                     const std::string& str_name, Elf32_Word str_type, bool alloc)
+        : ElfSectionBuilder(sec_name, type, ((alloc)?SHF_ALLOC:0), &strtab_, 0,
+                            sizeof(Elf32_Word), sizeof(Elf32_Sym)),
+          str_name_(str_name), str_type_(str_type),
+          strtab_(str_name, str_type, ((alloc) ? SHF_ALLOC : 0), NULL, 0, 1, 1) {}
+    ~ElfSymtabBuilder() {}
+
+   protected:
+    std::vector<Elf32_Word> GenerateHashContents();
+    std::string GenerateStrtab();
+    std::vector<Elf32_Sym> GenerateSymtab();
+
+    Elf32_Word GetSize() {
+      // 1 is for the implicit NULL symbol.
+      return symbols_.size() + 1;
+    }
+
+    struct ElfSymbolState {
+      const std::string name_;
+      const ElfSectionBuilder* section_;
+      Elf32_Addr addr_;
+      Elf32_Word size_;
+      bool is_relative_;
+      uint8_t info_;
+      uint8_t other_;
+      // Used during Write() to temporarially hold name index in the strtab.
+      Elf32_Word name_idx_;
+    };
+
+    // Information for the strsym for dynstr sections.
+    const std::string str_name_;
+    Elf32_Word str_type_;
+    // The symbols in the same order they will be in the symbol table.
+    std::vector<ElfSymbolState> symbols_;
+    ElfSectionBuilder strtab_;
+
+   private:
+    friend class ElfBuilder;
+  };
+
+  class ElfBuilder FINAL {
+   public:
+    ElfBuilder(OatWriter* oat_writer,
+               File* elf_file,
+               InstructionSet isa,
+               Elf32_Word rodata_relative_offset,
+               Elf32_Word rodata_size,
+               Elf32_Word text_relative_offset,
+               Elf32_Word text_size,
+               const bool add_symbols,
+               bool debug = false)
+        : oat_writer_(oat_writer),
+          elf_file_(elf_file),
+          add_symbols_(add_symbols),
+          debug_logging_(debug),
+          text_builder_(".text", text_size, text_relative_offset, SHT_PROGBITS,
+                        SHF_ALLOC | SHF_EXECINSTR),
+          rodata_builder_(".rodata", rodata_size, rodata_relative_offset,
+                          SHT_PROGBITS, SHF_ALLOC),
+          dynsym_builder_(".dynsym", SHT_DYNSYM, ".dynstr", SHT_STRTAB, true),
+          symtab_builder_(".symtab", SHT_SYMTAB, ".strtab", SHT_STRTAB, false),
+          hash_builder_(".hash", SHT_HASH, SHF_ALLOC, &dynsym_builder_, 0,
+                        sizeof(Elf32_Word), sizeof(Elf32_Word)),
+          dynamic_builder_(".dynamic", &dynsym_builder_),
+          shstrtab_builder_(".shstrtab", SHT_STRTAB, 0, NULL, 0, 1, 1) {
+      SetupEhdr();
+      SetupDynamic();
+      SetupRequiredSymbols();
+      SetISA(isa);
+    }
+    ~ElfBuilder() {}
+
+    bool Write();
+
+    // Adds the given raw section to the builder. This will copy it. The caller
+    // is responsible for deallocating their copy.
+    void RegisterRawSection(ElfRawSectionBuilder bld) {
+      other_builders_.push_back(bld);
+    }
+
+   private:
+    OatWriter* oat_writer_;
+    File* elf_file_;
+    const bool add_symbols_;
+    const bool debug_logging_;
+
+    bool fatal_error_ = false;
+
+    Elf32_Ehdr elf_header_;
+
+   public:
+    ElfOatSectionBuilder text_builder_;
+    ElfOatSectionBuilder rodata_builder_;
+    ElfSymtabBuilder dynsym_builder_;
+    ElfSymtabBuilder symtab_builder_;
+    ElfSectionBuilder hash_builder_;
+    ElfDynamicBuilder dynamic_builder_;
+    ElfSectionBuilder shstrtab_builder_;
+    std::vector<ElfRawSectionBuilder> other_builders_;
+
+   private:
+    void SetISA(InstructionSet isa);
+    void SetupEhdr();
+
+    // Sets up a bunch of the required Dynamic Section entries.
+    // Namely it will initialize all the mandatory ones that it can.
+    // Specifically:
+    // DT_HASH
+    // DT_STRTAB
+    // DT_SYMTAB
+    // DT_SYMENT
+    //
+    // Some such as DT_SONAME, DT_STRSZ and DT_NULL will be put in later.
+    void SetupDynamic();
+
+    // Sets up the basic dynamic symbols that are needed, namely all those we
+    // can know already.
+    //
+    // Specifically adds:
+    // oatdata
+    // oatexec
+    // oatlastword
+    void SetupRequiredSymbols();
+    void AssignSectionStr(ElfSectionBuilder *builder, std::string* strtab);
+    struct ElfFilePiece {
+      ElfFilePiece(const std::string& name, Elf32_Word offset, const void* data, Elf32_Word size)
+          : dbg_name_(name), offset_(offset), data_(data), size_(size) {}
+      ~ElfFilePiece() {}
+
+      const std::string& dbg_name_;
+      Elf32_Word offset_;
+      const void *data_;
+      Elf32_Word size_;
+      static bool Compare(ElfFilePiece a, ElfFilePiece b) {
+        return a.offset_ < b.offset_;
+      }
+    };
+
+    // Write each of the pieces out to the file.
+    bool WriteOutFile(const std::vector<ElfFilePiece>& pieces);
+    bool IncludingDebugSymbols() { return add_symbols_ && symtab_builder_.GetSize() > 1; }
+  };
+
   /*
    * @brief Generate the DWARF debug_info and debug_abbrev sections
    * @param oat_writer The Oat file Writer.
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 49cf71b..0b7272c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -180,7 +180,7 @@
   EXPECT_EQ(80U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(78 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 5d532ab..c6b9161 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -350,31 +350,14 @@
         uint32_t thumb_offset = compiled_method->CodeDelta();
         quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
 
-        std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation();
-        if (cfi_info != nullptr) {
-          // Copy in the FDE, if present
-          const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
-          if (fde != nullptr) {
-            // Copy the information into cfi_info and then fix the address in the new copy.
-            int cur_offset = cfi_info->size();
-            cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
-
-            // Set the 'initial_location' field to address the start of the method.
-            uint32_t new_value = quick_code_offset - writer_->oat_header_->GetExecutableOffset();
-            uint32_t offset_to_update = cur_offset + 2*sizeof(uint32_t);
-            (*cfi_info)[offset_to_update+0] = new_value;
-            (*cfi_info)[offset_to_update+1] = new_value >> 8;
-            (*cfi_info)[offset_to_update+2] = new_value >> 16;
-            (*cfi_info)[offset_to_update+3] = new_value >> 24;
-            std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, false);
-            writer_->method_info_.push_back(DebugInfo(name, new_value, new_value + code_size));
-          }
-        }
+        bool force_debug_capture = false;
+        bool deduped = false;
 
         // Deduplicate code arrays.
         auto code_iter = dedupe_map_.find(compiled_method);
         if (code_iter != dedupe_map_.end()) {
           quick_code_offset = code_iter->second;
+          deduped = true;
         } else {
           dedupe_map_.Put(compiled_method, quick_code_offset);
         }
@@ -409,6 +392,41 @@
           writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
           offset_ += code_size;
         }
+
+        uint32_t quick_code_start = quick_code_offset - writer_->oat_header_->GetExecutableOffset();
+        std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation();
+        if (cfi_info != nullptr) {
+          // Copy in the FDE, if present
+          const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
+          if (fde != nullptr) {
+            // Copy the information into cfi_info and then fix the address in the new copy.
+            int cur_offset = cfi_info->size();
+            cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
+
+            // Set the 'initial_location' field to address the start of the method.
+            uint32_t offset_to_update = cur_offset + 2*sizeof(uint32_t);
+            (*cfi_info)[offset_to_update+0] = quick_code_start;
+            (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
+            (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
+            (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
+            force_debug_capture = true;
+          }
+        }
+
+
+        if (writer_->compiler_driver_->DidIncludeDebugSymbols() || force_debug_capture) {
+          // Record debug information for this function if we are doing that or
+          // we have CFI and so need it.
+          std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
+          if (deduped) {
+            // TODO We should place the DEDUPED tag on the first instance of a
+            // deduplicated symbol so that it will show up in a debuggerd crash
+            // report.
+            name += " [ DEDUPED ]";
+          }
+          writer_->method_info_.push_back(DebugInfo(name, quick_code_start,
+                                                    quick_code_start + code_size));
+        }
       }
 
       if (kIsDebugBuild) {
@@ -517,7 +535,7 @@
                                                       NullHandle<mirror::ClassLoader>(),
                                                       NullHandle<mirror::ArtMethod>(),
                                                       invoke_type);
-    CHECK(method != NULL);
+    CHECK(method != NULL) << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
     // Portable code offsets are set by ElfWriterMclinker::FixupCompiledCodeOffset after linking.
     method->SetQuickOatCodeOffset(offsets.code_offset_);
     method->SetOatNativeGcMapOffset(offsets.gc_map_offset_);
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 8c20aa8..dbecb95 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -108,6 +108,10 @@
     return method_info_;
   }
 
+  bool DidAddSymbols() const {
+    return compiler_driver_->DidIncludeDebugSymbols();
+  }
+
  private:
   // The DataAccess classes are helper classes that provide access to members related to
   // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index beafbcc..b8332ad 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -18,6 +18,7 @@
 
 #include "code_generator_arm.h"
 #include "code_generator_x86.h"
+#include "code_generator_x86_64.h"
 #include "dex/verified_method.h"
 #include "driver/dex_compilation_unit.h"
 #include "gc_map_builder.h"
@@ -29,30 +30,60 @@
 
 namespace art {
 
-void CodeGenerator::Compile(CodeAllocator* allocator) {
+void CodeGenerator::CompileBaseline(CodeAllocator* allocator) {
   const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
   DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock());
   DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1)));
+  block_labels_.SetSize(blocks.Size());
+
+  DCHECK_EQ(frame_size_, kUninitializedFrameSize);
+  ComputeFrameSize(GetGraph()->GetMaximumNumberOfOutVRegs()
+                   + GetGraph()->GetNumberOfVRegs()
+                   + 1 /* filler */);
   GenerateFrameEntry();
+
   for (size_t i = 0, e = blocks.Size(); i < e; ++i) {
-    CompileBlock(blocks.Get(i));
+    HBasicBlock* block = blocks.Get(i);
+    Bind(GetLabelOf(block));
+    HGraphVisitor* location_builder = GetLocationBuilder();
+    HGraphVisitor* instruction_visitor = GetInstructionVisitor();
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      current->Accept(location_builder);
+      InitLocations(current);
+      current->Accept(instruction_visitor);
+    }
   }
+
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
   MemoryRegion code(buffer, code_size);
   GetAssembler()->FinalizeInstructions(code);
 }
 
-void CodeGenerator::CompileBlock(HBasicBlock* block) {
-  Bind(GetLabelOf(block));
-  HGraphVisitor* location_builder = GetLocationBuilder();
-  HGraphVisitor* instruction_visitor = GetInstructionVisitor();
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    HInstruction* current = it.Current();
-    current->Accept(location_builder);
-    InitLocations(current);
-    current->Accept(instruction_visitor);
+void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
+  // The frame size has already been computed during register allocation.
+  DCHECK_NE(frame_size_, kUninitializedFrameSize);
+  const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
+  DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock());
+  DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1)));
+  block_labels_.SetSize(blocks.Size());
+
+  GenerateFrameEntry();
+  for (size_t i = 0, e = blocks.Size(); i < e; ++i) {
+    HBasicBlock* block = blocks.Get(i);
+    Bind(GetLabelOf(block));
+    HGraphVisitor* instruction_visitor = GetInstructionVisitor();
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      current->Accept(instruction_visitor);
+    }
   }
+
+  size_t code_size = GetAssembler()->CodeSize();
+  uint8_t* buffer = allocator->Allocate(code_size);
+  MemoryRegion code(buffer, code_size);
+  GetAssembler()->FinalizeInstructions(code);
 }
 
 size_t CodeGenerator::AllocateFreeRegisterInternal(
@@ -191,7 +222,7 @@
       return new (allocator) x86::CodeGeneratorX86(graph);
     }
     case kX86_64: {
-      return new (allocator) x86::CodeGeneratorX86(graph);
+      return new (allocator) x86_64::CodeGeneratorX86_64(graph);
     }
     default:
       return nullptr;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e197ccd..83621e0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -28,6 +28,7 @@
 namespace art {
 
 static size_t constexpr kVRegSize = 4;
+static size_t constexpr kUninitializedFrameSize = 0;
 
 class DexCompilationUnit;
 
@@ -51,7 +52,8 @@
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
   // succeeded.
-  void Compile(CodeAllocator* allocator);
+  void CompileBaseline(CodeAllocator* allocator);
+  void CompileOptimized(CodeAllocator* allocator);
   static CodeGenerator* Create(ArenaAllocator* allocator,
                                HGraph* graph,
                                InstructionSet instruction_set);
@@ -61,6 +63,13 @@
   Label* GetLabelOf(HBasicBlock* block) const;
   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
 
+  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
+    // Note that this follows the current calling convention.
+    return GetFrameSize()
+        + kVRegSize  // Art method
+        + parameter->GetIndex() * kVRegSize;
+  }
+
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(Label* label) = 0;
@@ -69,6 +78,7 @@
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
   virtual size_t GetWordSize() const = 0;
+  virtual void ComputeFrameSize(size_t number_of_spill_slots) = 0;
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -95,14 +105,12 @@
 
  protected:
   CodeGenerator(HGraph* graph, size_t number_of_registers)
-      : frame_size_(0),
+      : frame_size_(kUninitializedFrameSize),
         graph_(graph),
         block_labels_(graph->GetArena(), 0),
         pc_infos_(graph->GetArena(), 32),
-        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {
-    block_labels_.SetSize(graph->GetBlocks().Size());
-  }
-  ~CodeGenerator() { }
+        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {}
+  ~CodeGenerator() {}
 
   // Register allocation logic.
   void AllocateRegistersLocally(HInstruction* instruction) const;
@@ -123,7 +131,6 @@
 
  private:
   void InitLocations(HInstruction* instruction);
-  void CompileBlock(HBasicBlock* block);
 
   HGraph* const graph_;
 
@@ -150,10 +157,10 @@
     return registers_[index];
   }
 
-  uint8_t GetStackOffsetOf(size_t index, size_t word_size) const {
+  uint8_t GetStackOffsetOf(size_t index) const {
     // We still reserve the space for parameters passed by registers.
-    // Add word_size for the method pointer.
-    return index * kVRegSize + word_size;
+    // Add one for the method pointer.
+    return (index + 1) * kVRegSize;
   }
 
  private:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ed3f43c..212a6dc 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -15,14 +15,14 @@
  */
 
 #include "code_generator_arm.h"
-#include "utils/assembler.h"
-#include "utils/arm/assembler_arm.h"
-#include "utils/arm/managed_register_arm.h"
 
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
 #include "thread.h"
+#include "utils/assembler.h"
+#include "utils/arm/assembler_arm.h"
+#include "utils/arm/managed_register_arm.h"
 
 #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
@@ -48,7 +48,8 @@
 CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
-      instruction_visitor_(graph, this) {}
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetArena(), this) {}
 
 static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
   return blocked_registers + kNumberOfAllocIds;
@@ -106,6 +107,9 @@
   // Reserve thread register.
   blocked_registers[TR] = true;
 
+  // Reserve temp register.
+  blocked_registers[IP] = true;
+
   // TODO: We currently don't use Quick's callee saved registers.
   blocked_registers[R5] = true;
   blocked_registers[R6] = true;
@@ -129,16 +133,18 @@
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
+void CodeGeneratorARM::ComputeFrameSize(size_t number_of_spill_slots) {
+  SetFrameSize(RoundUp(
+      number_of_spill_slots * kVRegSize
+      + kVRegSize  // Art method
+      + kNumberOfPushedRegistersAtEntry * kArmWordSize,
+      kStackAlignment));
+}
+
 void CodeGeneratorARM::GenerateFrameEntry() {
   core_spill_mask_ |= (1 << LR);
   __ PushList((1 << LR));
 
-  SetFrameSize(RoundUp(
-      (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize
-      + kVRegSize  // filler
-      + kArmWordSize  // Art method
-      + kNumberOfPushedRegistersAtEntry * kArmWordSize,
-      kStackAlignment));
   // The return PC has already been pushed on the stack.
   __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize));
   __ str(R0, Address(SP, 0));
@@ -159,7 +165,7 @@
   uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
   if (reg_number >= number_of_vregs - number_of_in_vregs) {
     // Local is a parameter of the method. It is stored in the caller's frame.
-    return GetFrameSize() + kArmWordSize  // ART method
+    return GetFrameSize() + kVRegSize  // ART method
                           + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
   } else {
     // Local is a temporary in this method. It is stored in this method's frame.
@@ -208,7 +214,7 @@
       if (index < calling_convention.GetNumberOfRegisters()) {
         return ArmCoreLocation(calling_convention.GetRegisterAt(index));
       } else {
-        return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize));
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -221,7 +227,7 @@
       } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
         return Location::QuickParameter(index);
       } else {
-        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize));
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -252,8 +258,8 @@
     if (source.IsRegister()) {
       __ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex()));
     } else {
-      __ ldr(R0, Address(SP, source.GetStackIndex()));
-      __ str(R0, Address(SP, destination.GetStackIndex()));
+      __ ldr(IP, Address(SP, source.GetStackIndex()));
+      __ str(IP, Address(SP, destination.GetStackIndex()));
     }
   }
 }
@@ -272,7 +278,7 @@
       __ Mov(destination.AsArm().AsRegisterPairLow(),
              calling_convention.GetRegisterAt(argument_index));
       __ ldr(destination.AsArm().AsRegisterPairHigh(),
-             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize()));
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       if (destination.AsArm().AsRegisterPair() == R1_R2) {
@@ -289,12 +295,12 @@
     if (source.IsRegister()) {
       __ Mov(calling_convention.GetRegisterAt(argument_index), source.AsArm().AsRegisterPairLow());
       __ str(source.AsArm().AsRegisterPairHigh(),
-             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex()));
       __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
-      __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
+      __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
@@ -312,14 +318,14 @@
       __ str(calling_convention.GetRegisterAt(argument_index),
              Address(SP, destination.GetStackIndex()));
       __ ldr(R0,
-             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize()));
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
       __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ ldr(R0, Address(SP, source.GetStackIndex()));
-      __ str(R0, Address(SP, destination.GetStackIndex()));
-      __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
-      __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+      __ ldr(IP, Address(SP, source.GetStackIndex()));
+      __ str(IP, Address(SP, destination.GetStackIndex()));
+      __ ldr(IP, Address(SP, source.GetHighStackIndex(kArmWordSize)));
+      __ str(IP, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
     }
   }
 }
@@ -330,8 +336,8 @@
     if (location.IsRegister()) {
       __ LoadImmediate(location.AsArm().AsCoreRegister(), value);
     } else {
-      __ LoadImmediate(R0, value);
-      __ str(R0, Address(SP, location.GetStackIndex()));
+      __ LoadImmediate(IP, value);
+      __ str(IP, Address(SP, location.GetStackIndex()));
     }
   } else if (instruction->AsLongConstant() != nullptr) {
     int64_t value = instruction->AsLongConstant()->GetValue();
@@ -339,10 +345,10 @@
       __ LoadImmediate(location.AsArm().AsRegisterPairLow(), Low32Bits(value));
       __ LoadImmediate(location.AsArm().AsRegisterPairHigh(), High32Bits(value));
     } else {
-      __ LoadImmediate(R0, Low32Bits(value));
-      __ str(R0, Address(SP, location.GetStackIndex()));
-      __ LoadImmediate(R0, High32Bits(value));
-      __ str(R0, Address(SP, location.GetHighStackIndex(kArmWordSize)));
+      __ LoadImmediate(IP, Low32Bits(value));
+      __ str(IP, Address(SP, location.GetStackIndex()));
+      __ LoadImmediate(IP, High32Bits(value));
+      __ str(IP, Address(SP, location.GetHighStackIndex(kArmWordSize)));
     }
   } else if (instruction->AsLoadLocal() != nullptr) {
     uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
@@ -484,15 +490,21 @@
 }
 
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) {
@@ -556,7 +568,7 @@
 
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(ArmCoreLocation(R0));
 
   InvokeDexCallingConventionVisitor calling_convention_visitor;
   for (size_t i = 0; i < invoke->InputCount(); i++) {
@@ -794,19 +806,102 @@
 }
 
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+  instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LOG(FATAL) << "Unreachable";
 }
 
 void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LOG(FATAL) << "Unreachable";
 }
 
 void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+ArmAssembler* ParallelMoveResolverARM::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverARM::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister()) {
+    if (destination.IsRegister()) {
+      __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ StoreToOffset(kStoreWord, source.AsArm().AsCoreRegister(),
+                       SP, destination.GetStackIndex());
+    }
+  } else if (source.IsStackSlot()) {
+    if (destination.IsRegister()) {
+      __ LoadFromOffset(kLoadWord, destination.AsArm().AsCoreRegister(),
+                        SP, source.GetStackIndex());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
+      __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+    }
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverARM::Exchange(Register reg, int mem) {
+  __ Mov(IP, reg);
+  __ LoadFromOffset(kLoadWord, reg, SP, mem);
+  __ StoreToOffset(kStoreWord, IP, SP, mem);
+}
+
+void ParallelMoveResolverARM::Exchange(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(this, IP, R0, codegen_->GetNumberOfCoreRegisters());
+  int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+  __ LoadFromOffset(kLoadWord, static_cast<Register>(ensure_scratch.GetRegister()),
+                    SP, mem1 + stack_offset);
+  __ LoadFromOffset(kLoadWord, IP, SP, mem2 + stack_offset);
+  __ StoreToOffset(kStoreWord, static_cast<Register>(ensure_scratch.GetRegister()),
+                   SP, mem2 + stack_offset);
+  __ StoreToOffset(kStoreWord, IP, SP, mem1 + stack_offset);
+}
+
+void ParallelMoveResolverARM::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister() && destination.IsRegister()) {
+    DCHECK_NE(source.AsArm().AsCoreRegister(), IP);
+    DCHECK_NE(destination.AsArm().AsCoreRegister(), IP);
+    __ Mov(IP, source.AsArm().AsCoreRegister());
+    __ Mov(source.AsArm().AsCoreRegister(), destination.AsArm().AsCoreRegister());
+    __ Mov(destination.AsArm().AsCoreRegister(), IP);
+  } else if (source.IsRegister() && destination.IsStackSlot()) {
+    Exchange(source.AsArm().AsCoreRegister(), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsRegister()) {
+    Exchange(destination.AsArm().AsCoreRegister(), source.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+    Exchange(source.GetStackIndex(), destination.GetStackIndex());
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverARM::SpillScratch(int reg) {
+  __ Push(static_cast<Register>(reg));
+}
+
+void ParallelMoveResolverARM::RestoreScratch(int reg) {
+  __ Pop(static_cast<Register>(reg));
 }
 
 }  // namespace arm
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index c945a06..712a24c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -19,6 +19,7 @@
 
 #include "code_generator.h"
 #include "nodes.h"
+#include "parallel_move_resolver.h"
 #include "utils/arm/assembler_arm32.h"
 
 namespace art {
@@ -59,6 +60,27 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };
 
+class ParallelMoveResolverARM : public ParallelMoveResolver {
+ public:
+  ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen)
+      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+  virtual void EmitMove(size_t index) OVERRIDE;
+  virtual void EmitSwap(size_t index) OVERRIDE;
+  virtual void SpillScratch(int reg) OVERRIDE;
+  virtual void RestoreScratch(int reg) OVERRIDE;
+
+  ArmAssembler* GetAssembler() const;
+
+ private:
+  void Exchange(Register reg, int mem);
+  void Exchange(int mem1, int mem2);
+
+  CodeGeneratorARM* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM);
+};
+
 class LocationsBuilderARM : public HGraphVisitor {
  public:
   explicit LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -104,6 +126,7 @@
   explicit CodeGeneratorARM(HGraph* graph);
   virtual ~CodeGeneratorARM() { }
 
+  virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE;
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
@@ -144,6 +167,10 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  ParallelMoveResolverARM* GetMoveResolver() {
+    return &move_resolver_;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
@@ -152,6 +179,7 @@
 
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
+  ParallelMoveResolverARM move_resolver_;
   Arm32Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8bfd8d6..f4b12e2 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -48,7 +48,16 @@
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph)
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
-      instruction_visitor_(graph, this) {}
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetArena(), this) {}
+
+void CodeGeneratorX86::ComputeFrameSize(size_t number_of_spill_slots) {
+  SetFrameSize(RoundUp(
+      number_of_spill_slots * kVRegSize
+      + kVRegSize  // Art method
+      + kNumberOfPushedRegistersAtEntry * kX86WordSize,
+      kStackAlignment));
+}
 
 static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
   return blocked_registers + kNumberOfAllocIds;
@@ -125,13 +134,6 @@
   static const int kFakeReturnRegister = 8;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
-  SetFrameSize(RoundUp(
-      (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize
-      + kVRegSize  // filler
-      + kX86WordSize  // Art method
-      + kNumberOfPushedRegistersAtEntry * kX86WordSize,
-      kStackAlignment));
-
   // The return PC has already been pushed on the stack.
   __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
   __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
@@ -155,7 +157,7 @@
   uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
   if (reg_number >= number_of_vregs - number_of_in_vregs) {
     // Local is a parameter of the method. It is stored in the caller's frame.
-    return GetFrameSize() + kX86WordSize  // ART method
+    return GetFrameSize() + kVRegSize  // ART method
                           + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
   } else {
     // Local is a temporary in this method. It is stored in this method's frame.
@@ -219,7 +221,7 @@
       if (index < calling_convention.GetNumberOfRegisters()) {
         return X86CpuLocation(calling_convention.GetRegisterAt(index));
       } else {
-        return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize));
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -232,7 +234,7 @@
       } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
         return Location::QuickParameter(index);
       } else {
-        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize));
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -264,8 +266,8 @@
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister());
     } else {
       DCHECK(source.IsStackSlot());
-      __ movl(EAX, Address(ESP, source.GetStackIndex()));
-      __ movl(Address(ESP, destination.GetStackIndex()), EAX);
+      __ pushl(Address(ESP, source.GetStackIndex()));
+      __ popl(Address(ESP, destination.GetStackIndex()));
     }
   }
 }
@@ -284,7 +286,7 @@
       __ movl(destination.AsX86().AsRegisterPairLow(),
               calling_convention.GetRegisterAt(argument_index));
       __ movl(destination.AsX86().AsRegisterPairHigh(), Address(ESP,
-          calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize()));
+          calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movl(destination.AsX86().AsRegisterPairLow(), Address(ESP, source.GetStackIndex()));
@@ -296,14 +298,14 @@
     uint32_t argument_index = destination.GetQuickParameterIndex();
     if (source.IsRegister()) {
       __ movl(calling_convention.GetRegisterAt(argument_index), source.AsX86().AsRegisterPairLow());
-      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)),
+      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)),
               source.AsX86().AsRegisterPairHigh());
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movl(calling_convention.GetRegisterAt(argument_index),
               Address(ESP, source.GetStackIndex()));
-      __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize)));
-      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)), EAX);
+      __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+      __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     }
   } else {
     if (source.IsRegister()) {
@@ -315,15 +317,15 @@
       uint32_t argument_index = source.GetQuickParameterIndex();
       __ movl(Address(ESP, destination.GetStackIndex()),
               calling_convention.GetRegisterAt(argument_index));
-      __ movl(EAX, Address(ESP,
-          calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize()));
-      __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX);
+      __ pushl(Address(ESP,
+          calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
+      __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movl(EAX, Address(ESP, source.GetStackIndex()));
-      __ movl(Address(ESP, destination.GetStackIndex()), EAX);
-      __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize)));
-      __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX);
+      __ pushl(Address(ESP, source.GetStackIndex()));
+      __ popl(Address(ESP, destination.GetStackIndex()));
+      __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+      __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)));
     }
   }
 }
@@ -494,15 +496,21 @@
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
-  constant->SetLocations(nullptr);
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) {
@@ -567,7 +575,7 @@
 
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(X86CpuLocation(EAX));
 
   InvokeDexCallingConventionVisitor calling_convention_visitor;
   for (size_t i = 0; i < invoke->InputCount(); i++) {
@@ -796,7 +804,6 @@
 }
 
 void InstructionCodeGeneratorX86::VisitParameterValue(HParameterValue* instruction) {
-  // Nothing to do, the parameter is already at its location.
 }
 
 void LocationsBuilderX86::VisitNot(HNot* instruction) {
@@ -814,19 +821,114 @@
 }
 
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+  instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LOG(FATAL) << "Unreachable";
 }
 
 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LOG(FATAL) << "Unreachable";
 }
 
 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src) {
+  ScratchRegisterScope ensure_scratch(
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+  __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, src + stack_offset));
+  __ movl(Address(ESP, dst + stack_offset), static_cast<Register>(ensure_scratch.GetRegister()));
+}
+
+void ParallelMoveResolverX86::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister()) {
+    if (destination.IsRegister()) {
+      __ movl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister());
+    }
+  } else if (source.IsStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movl(destination.AsX86().AsCpuRegister(), Address(ESP, source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsStackSlot());
+      MoveMemoryToMemory(destination.GetStackIndex(),
+                         source.GetStackIndex());
+    }
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
+  Register suggested_scratch = reg == EAX ? EBX : EAX;
+  ScratchRegisterScope ensure_scratch(
+      this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+  __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
+  __ movl(Address(ESP, mem + stack_offset), reg);
+  __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
+}
+
+void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch1(
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+
+  Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
+  ScratchRegisterScope ensure_scratch2(
+      this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
+  stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
+  __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
+  __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
+  __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
+  __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+}
+
+void ParallelMoveResolverX86::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister() && destination.IsRegister()) {
+    __ xchgl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister());
+  } else if (source.IsRegister() && destination.IsStackSlot()) {
+    Exchange(source.AsX86().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsRegister()) {
+    Exchange(destination.AsX86().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverX86::SpillScratch(int reg) {
+  __ pushl(static_cast<Register>(reg));
+}
+
+void ParallelMoveResolverX86::RestoreScratch(int reg) {
+  __ popl(static_cast<Register>(reg));
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 4a70636..acc670e 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -19,6 +19,7 @@
 
 #include "code_generator.h"
 #include "nodes.h"
+#include "parallel_move_resolver.h"
 #include "utils/x86/assembler_x86.h"
 
 namespace art {
@@ -59,6 +60,28 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };
 
+class ParallelMoveResolverX86 : public ParallelMoveResolver {
+ public:
+  ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
+      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+  virtual void EmitMove(size_t index) OVERRIDE;
+  virtual void EmitSwap(size_t index) OVERRIDE;
+  virtual void SpillScratch(int reg) OVERRIDE;
+  virtual void RestoreScratch(int reg) OVERRIDE;
+
+  X86Assembler* GetAssembler() const;
+
+ private:
+  void Exchange(Register reg, int mem);
+  void Exchange(int mem1, int mem2);
+  void MoveMemoryToMemory(int dst, int src);
+
+  CodeGeneratorX86* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86);
+};
+
 class LocationsBuilderX86 : public HGraphVisitor {
  public:
   LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen)
@@ -105,6 +128,7 @@
   explicit CodeGeneratorX86(HGraph* graph);
   virtual ~CodeGeneratorX86() { }
 
+  virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE;
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
@@ -145,6 +169,10 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  ParallelMoveResolverX86* GetMoveResolver() {
+    return &move_resolver_;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
@@ -153,6 +181,7 @@
 
   LocationsBuilderX86 location_builder_;
   InstructionCodeGeneratorX86 instruction_visitor_;
+  ParallelMoveResolverX86 move_resolver_;
   X86Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
new file mode 100644
index 0000000..ebeef9d
--- /dev/null
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -0,0 +1,801 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86_64.h"
+
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "mirror/array.h"
+#include "mirror/art_method.h"
+#include "mirror/object_reference.h"
+#include "thread.h"
+#include "utils/assembler.h"
+#include "utils/x86_64/assembler_x86_64.h"
+#include "utils/x86_64/managed_register_x86_64.h"
+
+#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
+
+namespace art {
+
+x86_64::X86_64ManagedRegister Location::AsX86_64() const {
+  return reg().AsX86_64();
+}
+
+namespace x86_64 {
+
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
+void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
+  stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
+}
+
+void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+  stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
+}
+
+static Location X86_64CpuLocation(Register reg) {
+  return Location::RegisterLocation(X86_64ManagedRegister::FromCpuRegister(reg));
+}
+
+CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
+      : CodeGenerator(graph, kNumberOfRegIds),
+        location_builder_(graph, this),
+        instruction_visitor_(graph, this),
+        move_resolver_(graph->GetArena(), this) {}
+
+InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
+ManagedRegister CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type,
+                                                          bool* blocked_registers) const {
+  switch (type) {
+    case Primitive::kPrimLong:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters);
+      return X86_64ManagedRegister::FromCpuRegister(static_cast<Register>(reg));
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << type;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  return ManagedRegister::NoRegister();
+}
+
+void CodeGeneratorX86_64::SetupBlockedRegisters(bool* blocked_registers) const {
+  // Stack register is always reserved.
+  blocked_registers[RSP] = true;
+
+  // Block the register used as TMP.
+  blocked_registers[TMP] = true;
+
+  // TODO: We currently don't use Quick's callee saved registers.
+  blocked_registers[RBX] = true;
+  blocked_registers[RBP] = true;
+  blocked_registers[R12] = true;
+  blocked_registers[R13] = true;
+  blocked_registers[R14] = true;
+  blocked_registers[R15] = true;
+}
+
+void CodeGeneratorX86_64::ComputeFrameSize(size_t number_of_spill_slots) {
+  // Add the current ART method to the frame size, the return PC, and the filler.
+  SetFrameSize(RoundUp(
+      number_of_spill_slots * kVRegSize
+      + kVRegSize  // filler
+      + kVRegSize  // Art method
+      + kNumberOfPushedRegistersAtEntry * kX86_64WordSize,
+      kStackAlignment));
+}
+
+void CodeGeneratorX86_64::GenerateFrameEntry() {
+  // Create a fake register to mimic Quick.
+  static const int kFakeReturnRegister = 16;
+  core_spill_mask_ |= (1 << kFakeReturnRegister);
+
+  // The return PC has already been pushed on the stack.
+  __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+  __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
+}
+
+void CodeGeneratorX86_64::GenerateFrameExit() {
+  __ addq(CpuRegister(RSP),
+          Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+}
+
+void CodeGeneratorX86_64::Bind(Label* label) {
+  __ Bind(label);
+}
+
+void InstructionCodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) {
+  __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+}
+
+int32_t CodeGeneratorX86_64::GetStackSlot(HLocal* local) const {
+  uint16_t reg_number = local->GetRegNumber();
+  uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs();
+  uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
+  if (reg_number >= number_of_vregs - number_of_in_vregs) {
+    // Local is a parameter of the method. It is stored in the caller's frame.
+    return GetFrameSize() + kVRegSize  // ART method
+                          + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
+  } else {
+    // Local is a temporary in this method. It is stored in this method's frame.
+    return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86_64WordSize)
+                          - kVRegSize
+                          - (number_of_vregs * kVRegSize)
+                          + (reg_number * kVRegSize);
+  }
+}
+
+Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
+  switch (load->GetType()) {
+    case Primitive::kPrimLong:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+      break;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented type " << load->GetType();
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type " << load->GetType();
+  }
+
+  LOG(FATAL) << "Unreachable";
+  return Location();
+}
+
+void CodeGeneratorX86_64::Move(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+    } else if (source.IsStackSlot()) {
+      __ movl(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movq(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+    }
+  } else if (destination.IsStackSlot()) {
+    if (source.IsRegister()) {
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(source.IsStackSlot());
+      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
+  } else {
+    DCHECK(destination.IsDoubleStackSlot());
+    if (source.IsRegister()) {
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
+  }
+}
+
+void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+  if (instruction->AsIntConstant() != nullptr) {
+    Immediate imm(instruction->AsIntConstant()->GetValue());
+    if (location.IsRegister()) {
+      __ movl(location.AsX86_64().AsCpuRegister(), imm);
+    } else {
+      __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
+    }
+  } else if (instruction->AsLongConstant() != nullptr) {
+    int64_t value = instruction->AsLongConstant()->GetValue();
+    if (location.IsRegister()) {
+      __ movq(location.AsX86_64().AsCpuRegister(), Immediate(value));
+    } else {
+      __ movq(CpuRegister(TMP), Immediate(value));
+      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
+    }
+  } else if (instruction->AsLoadLocal() != nullptr) {
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+        break;
+
+      case Primitive::kPrimLong:
+        Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
+    }
+  } else {
+    // This can currently only happen when the instruction that requests the move
+    // is the next to be compiled.
+    DCHECK_EQ(instruction->GetNext(), move_for);
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+      case Primitive::kPrimLong:
+        Move(location, instruction->GetLocations()->Out());
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+    }
+  }
+}
+
+void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
+  got->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
+  HBasicBlock* successor = got->GetSuccessor();
+  if (GetGraph()->GetExitBlock() == successor) {
+    codegen_->GenerateFrameExit();
+  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+    __ jmp(codegen_->GetLabelOf(successor));
+  }
+}
+
+void LocationsBuilderX86_64::VisitExit(HExit* exit) {
+  exit->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
+  if (kIsDebugBuild) {
+    __ Comment("Unreachable");
+    __ int3();
+  }
+}
+
+void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if_instr->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
+  // TODO: Generate the input as a condition, instead of materializing in a register.
+  __ cmpl(if_instr->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), Immediate(0));
+  __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
+    __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+  }
+}
+
+void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) {
+  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+}
+
+void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
+  switch (store->InputAt(1)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+  }
+  store->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) {
+}
+
+void LocationsBuilderX86_64::VisitEqual(HEqual* equal) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  equal->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* equal) {
+  __ cmpq(equal->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(),
+          equal->GetLocations()->InAt(1).AsX86_64().AsCpuRegister());
+  __ setcc(kEqual, equal->GetLocations()->Out().AsX86_64().AsCpuRegister());
+}
+
+void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
+}
+
+void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
+  ret->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
+  codegen_->GenerateFrameExit();
+  __ ret();
+}
+
+void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret);
+  switch (ret->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, X86_64CpuLocation(RAX));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+  }
+  ret->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
+  if (kIsDebugBuild) {
+    switch (ret->InputAt(0)->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+      case Primitive::kPrimLong:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), RAX);
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+    }
+  }
+  codegen_->GenerateFrameExit();
+  __ ret();
+}
+
+static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+
+class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      uint32_t index = gp_index_++;
+      stack_index_++;
+      if (index < calling_convention.GetNumberOfRegisters()) {
+        return X86_64CpuLocation(calling_convention.GetRegisterAt(index));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
+      }
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t index = gp_index_;
+      stack_index_ += 2;
+      if (index < calling_convention.GetNumberOfRegisters()) {
+        gp_index_ += 1;
+        return X86_64CpuLocation(calling_convention.GetRegisterAt(index));
+      } else {
+        gp_index_ += 2;
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
+      }
+    }
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented parameter type " << type;
+      break;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected parameter type " << type;
+      break;
+  }
+  return Location();
+}
+
+void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
+  locations->AddTemp(X86_64CpuLocation(RDI));
+
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  for (size_t i = 0; i < invoke->InputCount(); ++i) {
+    HInstruction* input = invoke->InputAt(i);
+    locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
+  }
+
+  switch (invoke->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      locations->SetOut(X86_64CpuLocation(RAX));
+      break;
+
+    case Primitive::kPrimVoid:
+      break;
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      break;
+  }
+
+  invoke->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsX86_64().AsCpuRegister();
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).SizeValue() +
+      invoke->GetIndexInDexCache() * heap_reference_size;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  // temp = temp[index_in_cache]
+  __ movl(temp, Address(temp, index_in_cache));
+  // (temp + offset_of_quick_compiled_code)()
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+
+  codegen_->RecordPcInfo(invoke->GetDexPc());
+}
+
+void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+  }
+  add->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
+  LocationSummary* locations = add->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt: {
+      __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitSub(HSub* sub) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub);
+  switch (sub->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+  }
+  sub->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
+  LocationSummary* locations = sub->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+  switch (sub->GetResultType()) {
+    case Primitive::kPrimInt: {
+      __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      __ subq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetOut(X86_64CpuLocation(RAX));
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
+  InvokeRuntimeCallingConvention calling_convention;
+  LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
+  __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
+
+  __ gs()->call(Address::Absolute(
+      QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocObjectWithAccessCheck), true));
+
+  codegen_->RecordPcInfo(instruction->GetDexPc());
+}
+
+void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitParameterValue(HParameterValue* instruction) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderX86_64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+  __ xorq(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1));
+}
+
+void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverX86_64::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister()) {
+    if (destination.IsRegister()) {
+      __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
+    }
+  } else if (source.IsStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movl(destination.AsX86_64().AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) {
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg));
+  __ movl(CpuRegister(reg), CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movl(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
+void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister() && destination.IsRegister()) {
+    __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+  } else if (source.IsRegister() && destination.IsStackSlot()) {
+    Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsRegister()) {
+    Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+
+void ParallelMoveResolverX86_64::SpillScratch(int reg) {
+  __ pushq(CpuRegister(reg));
+}
+
+
+void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
+  __ popq(CpuRegister(reg));
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
new file mode 100644
index 0000000..f07df29
--- /dev/null
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
+
+#include "code_generator.h"
+#include "nodes.h"
+#include "parallel_move_resolver.h"
+#include "utils/x86_64/assembler_x86_64.h"
+
+namespace art {
+namespace x86_64 {
+
+static constexpr size_t kX86_64WordSize = 8;
+
+static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
+
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+class InvokeDexCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitor() : gp_index_(0), stack_index_(0) {}
+
+  Location GetNextLocation(Primitive::Type type);
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+  uint32_t gp_index_;
+  uint32_t stack_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
+class CodeGeneratorX86_64;
+
+class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
+ public:
+  ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
+      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+  virtual void EmitMove(size_t index) OVERRIDE;
+  virtual void EmitSwap(size_t index) OVERRIDE;
+  virtual void SpillScratch(int reg) OVERRIDE;
+  virtual void RestoreScratch(int reg) OVERRIDE;
+
+  X86_64Assembler* GetAssembler() const;
+
+ private:
+  void Exchange(CpuRegister reg, int mem);
+  void Exchange(int mem1, int mem2);
+
+  CodeGeneratorX86_64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64);
+};
+
+class LocationsBuilderX86_64 : public HGraphVisitor {
+ public:
+  LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name)     \
+  virtual void Visit##name(H##name* instr);
+
+  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  CodeGeneratorX86_64* const codegen_;
+  InvokeDexCallingConventionVisitor parameter_visitor_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
+};
+
+class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+ public:
+  InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name)     \
+  virtual void Visit##name(H##name* instr);
+
+  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void LoadCurrentMethod(CpuRegister reg);
+
+  X86_64Assembler* GetAssembler() const { return assembler_; }
+
+ private:
+  X86_64Assembler* const assembler_;
+  CodeGeneratorX86_64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
+};
+
+class CodeGeneratorX86_64 : public CodeGenerator {
+ public:
+  explicit CodeGeneratorX86_64(HGraph* graph);
+  virtual ~CodeGeneratorX86_64() {}
+
+  virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE;
+  virtual void GenerateFrameEntry() OVERRIDE;
+  virtual void GenerateFrameExit() OVERRIDE;
+  virtual void Bind(Label* label) OVERRIDE;
+  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+
+  virtual size_t GetWordSize() const OVERRIDE {
+    return kX86_64WordSize;
+  }
+
+  virtual HGraphVisitor* GetLocationBuilder() OVERRIDE {
+    return &location_builder_;
+  }
+
+  virtual HGraphVisitor* GetInstructionVisitor() OVERRIDE {
+    return &instruction_visitor_;
+  }
+
+  virtual X86_64Assembler* GetAssembler() OVERRIDE {
+    return &assembler_;
+  }
+
+  ParallelMoveResolverX86_64* GetMoveResolver() {
+    return &move_resolver_;
+  }
+
+  int32_t GetStackSlot(HLocal* local) const;
+  virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+
+  virtual size_t GetNumberOfRegisters() const OVERRIDE {
+    return kNumberOfRegIds;
+  }
+
+  virtual size_t GetNumberOfCoreRegisters() const OVERRIDE {
+    return kNumberOfCpuRegisters;
+  }
+
+  virtual size_t GetNumberOfFloatingPointRegisters() const OVERRIDE {
+    return kNumberOfFloatRegisters;
+  }
+
+  virtual void SetupBlockedRegisters(bool* blocked_registers) const OVERRIDE;
+  virtual ManagedRegister AllocateFreeRegister(
+      Primitive::Type type, bool* blocked_registers) const OVERRIDE;
+  virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
+  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+
+ private:
+  // Helper method to move a value between two locations.
+  void Move(Location destination, Location source);
+
+  LocationsBuilderX86_64 location_builder_;
+  InstructionCodeGeneratorX86_64 instruction_visitor_;
+  ParallelMoveResolverX86_64 move_resolver_;
+  X86_64Assembler assembler_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 7684bb1..c3baf1a 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -47,6 +47,17 @@
   DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
 };
 
+#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) {
+  typedef int32_t (*fptr)();
+  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
+  int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())();
+  if (has_result) {
+    CHECK_EQ(result, expected);
+  }
+}
+#endif
+
 static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -55,24 +66,23 @@
   HGraph* graph = builder.BuildGraph(*item);
   ASSERT_NE(graph, nullptr);
   InternalCodeAllocator allocator;
+
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86);
-  codegen->Compile(&allocator);
-  typedef int32_t (*fptr)();
+  codegen->CompileBaseline(&allocator);
 #if defined(__i386__)
-  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())();
-  if (has_result) {
-    CHECK_EQ(result, expected);
-  }
+  Run(allocator, has_result, expected);
 #endif
+
   codegen = CodeGenerator::Create(&arena, graph, kArm);
-  codegen->Compile(&allocator);
+  codegen->CompileBaseline(&allocator);
 #if defined(__arm__)
-  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())();
-  if (has_result) {
-    CHECK_EQ(result, expected);
-  }
+  Run(allocator, has_result, expected);
+#endif
+
+  codegen = CodeGenerator::Create(&arena, graph, kX86_64);
+  codegen->CompileBaseline(&allocator);
+#if defined(__x86_64__)
+  Run(allocator, has_result, expected);
 #endif
 }
 
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
new file mode 100644
index 0000000..371478c
--- /dev/null
+++ b/compiler/optimizing/graph_test.cc
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/stringprintf.h"
+#include "builder.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "pretty_printer.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static HBasicBlock* createIfBlock(HGraph* graph, ArenaAllocator* allocator) {
+  HBasicBlock* if_block = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(if_block);
+  HInstruction* instr = new (allocator) HIntConstant(4);
+  if_block->AddInstruction(instr);
+  instr = new (allocator) HIf(instr);
+  if_block->AddInstruction(instr);
+  return if_block;
+}
+
+static HBasicBlock* createGotoBlock(HGraph* graph, ArenaAllocator* allocator) {
+  HBasicBlock* block = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  HInstruction* got = new (allocator) HGoto();
+  block->AddInstruction(got);
+  return block;
+}
+
+static HBasicBlock* createReturnBlock(HGraph* graph, ArenaAllocator* allocator) {
+  HBasicBlock* block = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  HInstruction* return_instr = new (allocator) HReturnVoid();
+  block->AddInstruction(return_instr);
+  return block;
+}
+
+static HBasicBlock* createExitBlock(HGraph* graph, ArenaAllocator* allocator) {
+  HBasicBlock* block = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  HInstruction* exit_instr = new (allocator) HExit();
+  block->AddInstruction(exit_instr);
+  return block;
+}
+
+
+// Test that the successors of an if block stay consistent after a SimplifyCFG.
+// This test sets the false block to be the return block.
+TEST(GraphTest, IfSuccessorSimpleJoinBlock1) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* if_true = createGotoBlock(graph, &allocator);
+  HBasicBlock* return_block = createReturnBlock(graph, &allocator);
+  HBasicBlock* exit_block = createExitBlock(graph, &allocator);
+
+  entry_block->AddSuccessor(if_block);
+  if_block->AddSuccessor(if_true);
+  if_true->AddSuccessor(return_block);
+  if_block->AddSuccessor(return_block);
+  return_block->AddSuccessor(exit_block);
+
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_true);
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block);
+
+  graph->SimplifyCFG();
+
+  // Ensure we still have the same if true block.
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_true);
+
+  // Ensure the critical edge has been removed.
+  HBasicBlock* false_block = if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor();
+  ASSERT_NE(false_block, return_block);
+
+  // Ensure the new block branches to the join block.
+  ASSERT_EQ(false_block->GetSuccessors().Get(0), return_block);
+}
+
+// Test that the successors of an if block stay consistent after a SimplifyCFG.
+// This test sets the true block to be the return block.
+TEST(GraphTest, IfSuccessorSimpleJoinBlock2) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* if_false = createGotoBlock(graph, &allocator);
+  HBasicBlock* return_block = createReturnBlock(graph, &allocator);
+  HBasicBlock* exit_block = createExitBlock(graph, &allocator);
+
+  entry_block->AddSuccessor(if_block);
+  if_block->AddSuccessor(return_block);
+  if_false->AddSuccessor(return_block);
+  if_block->AddSuccessor(if_false);
+  return_block->AddSuccessor(exit_block);
+
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block);
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_false);
+
+  graph->SimplifyCFG();
+
+  // Ensure we still have the same if true block.
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_false);
+
+  // Ensure the critical edge has been removed.
+  HBasicBlock* true_block = if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor();
+  ASSERT_NE(true_block, return_block);
+
+  // Ensure the new block branches to the join block.
+  ASSERT_EQ(true_block->GetSuccessors().Get(0), return_block);
+}
+
+// Test that the successors of an if block stay consistent after a SimplifyCFG.
+// This test sets the true block to be the loop header.
+TEST(GraphTest, IfSuccessorMultipleBackEdges1) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* return_block = createReturnBlock(graph, &allocator);
+  HBasicBlock* exit_block = createExitBlock(graph, &allocator);
+
+  graph->SetEntryBlock(entry_block);
+  entry_block->AddSuccessor(if_block);
+  if_block->AddSuccessor(if_block);
+  if_block->AddSuccessor(return_block);
+  return_block->AddSuccessor(exit_block);
+
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_block);
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block);
+
+  graph->BuildDominatorTree();
+
+  // Ensure we still have the same if false block.
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block);
+
+  // Ensure there is only one back edge.
+  ASSERT_EQ(if_block->GetPredecessors().Size(), 2u);
+  ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block);
+  ASSERT_NE(if_block->GetPredecessors().Get(1), if_block);
+
+  // Ensure the new block is the back edge.
+  ASSERT_EQ(if_block->GetPredecessors().Get(1),
+            if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor());
+}
+
+// Test that the successors of an if block stay consistent after a SimplifyCFG.
+// This test sets the false block to be the loop header.
+TEST(GraphTest, IfSuccessorMultipleBackEdges2) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* return_block = createReturnBlock(graph, &allocator);
+  HBasicBlock* exit_block = createExitBlock(graph, &allocator);
+
+  graph->SetEntryBlock(entry_block);
+  entry_block->AddSuccessor(if_block);
+  if_block->AddSuccessor(return_block);
+  if_block->AddSuccessor(if_block);
+  return_block->AddSuccessor(exit_block);
+
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block);
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_block);
+
+  graph->BuildDominatorTree();
+
+  // Ensure we still have the same if true block.
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block);
+
+  // Ensure there is only one back edge.
+  ASSERT_EQ(if_block->GetPredecessors().Size(), 2u);
+  ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block);
+  ASSERT_NE(if_block->GetPredecessors().Get(1), if_block);
+
+  // Ensure the new block is the back edge.
+  ASSERT_EQ(if_block->GetPredecessors().Get(1),
+            if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor());
+}
+
+// Test that the successors of an if block stay consistent after a SimplifyCFG.
+// This test sets the true block to be a loop header with multiple pre headers.
+TEST(GraphTest, IfSuccessorMultiplePreHeaders1) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* loop_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* return_block = createReturnBlock(graph, &allocator);
+
+  graph->SetEntryBlock(entry_block);
+  entry_block->AddSuccessor(first_if_block);
+  first_if_block->AddSuccessor(if_block);
+  first_if_block->AddSuccessor(loop_block);
+  loop_block->AddSuccessor(loop_block);
+  if_block->AddSuccessor(loop_block);
+  if_block->AddSuccessor(return_block);
+
+
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), loop_block);
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block);
+
+  graph->BuildDominatorTree();
+
+  HIf* if_instr = if_block->GetLastInstruction()->AsIf();
+  // Ensure we still have the same if false block.
+  ASSERT_EQ(if_instr->IfFalseSuccessor(), return_block);
+
+  // Ensure there is only one pre header..
+  ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u);
+
+  // Ensure the new block is the successor of the true block.
+  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Size(), 1u);
+  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Get(0),
+            loop_block->GetLoopInformation()->GetPreHeader());
+}
+
+// Test that the successors of an if block stay consistent after a SimplifyCFG.
+// This test sets the false block to be a loop header with multiple pre headers.
+TEST(GraphTest, IfSuccessorMultiplePreHeaders2) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* if_block = createIfBlock(graph, &allocator);
+  HBasicBlock* loop_block = createGotoBlock(graph, &allocator);
+  HBasicBlock* return_block = createReturnBlock(graph, &allocator);
+
+  graph->SetEntryBlock(entry_block);
+  entry_block->AddSuccessor(first_if_block);
+  first_if_block->AddSuccessor(if_block);
+  first_if_block->AddSuccessor(loop_block);
+  loop_block->AddSuccessor(loop_block);
+  if_block->AddSuccessor(return_block);
+  if_block->AddSuccessor(loop_block);
+
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block);
+  ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), loop_block);
+
+  graph->BuildDominatorTree();
+
+  HIf* if_instr = if_block->GetLastInstruction()->AsIf();
+  // Ensure we still have the same if true block.
+  ASSERT_EQ(if_instr->IfTrueSuccessor(), return_block);
+
+  // Ensure there is only one pre header..
+  ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u);
+
+  // Ensure the new block is the successor of the false block.
+  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Size(), 1u);
+  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Get(0),
+            loop_block->GetLoopInformation()->GetPreHeader());
+}
+
+TEST(GraphTest, InsertInstructionBefore) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* block = createGotoBlock(graph, &allocator);
+  HInstruction* got = block->GetLastInstruction();
+  ASSERT_TRUE(got->IsControlFlow());
+
+  // Test at the beginning of the block.
+  HInstruction* first_instruction = new (&allocator) HIntConstant(4);
+  block->InsertInstructionBefore(first_instruction, got);
+
+  ASSERT_NE(first_instruction->GetId(), -1);
+  ASSERT_EQ(first_instruction->GetBlock(), block);
+  ASSERT_EQ(block->GetFirstInstruction(), first_instruction);
+  ASSERT_EQ(block->GetLastInstruction(), got);
+  ASSERT_EQ(first_instruction->GetNext(), got);
+  ASSERT_EQ(first_instruction->GetPrevious(), nullptr);
+  ASSERT_EQ(got->GetNext(), nullptr);
+  ASSERT_EQ(got->GetPrevious(), first_instruction);
+
+  // Test in the middle of the block.
+  HInstruction* second_instruction = new (&allocator) HIntConstant(4);
+  block->InsertInstructionBefore(second_instruction, got);
+
+  ASSERT_NE(second_instruction->GetId(), -1);
+  ASSERT_EQ(second_instruction->GetBlock(), block);
+  ASSERT_EQ(block->GetFirstInstruction(), first_instruction);
+  ASSERT_EQ(block->GetLastInstruction(), got);
+  ASSERT_EQ(first_instruction->GetNext(), second_instruction);
+  ASSERT_EQ(first_instruction->GetPrevious(), nullptr);
+  ASSERT_EQ(second_instruction->GetNext(), got);
+  ASSERT_EQ(second_instruction->GetPrevious(), first_instruction);
+  ASSERT_EQ(got->GetNext(), nullptr);
+  ASSERT_EQ(got->GetPrevious(), second_instruction);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 5c5042e..a49ce64 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -28,8 +28,15 @@
  */
 class HGraphVisualizerPrinter : public HGraphVisitor {
  public:
-  HGraphVisualizerPrinter(HGraph* graph, std::ostream& output, const CodeGenerator& codegen)
-      : HGraphVisitor(graph), output_(output), codegen_(codegen), indent_(0) {}
+  HGraphVisualizerPrinter(HGraph* graph,
+                          std::ostream& output,
+                          const char* pass_name,
+                          const CodeGenerator& codegen)
+      : HGraphVisitor(graph),
+        output_(output),
+        pass_name_(pass_name),
+        codegen_(codegen),
+        indent_(0) {}
 
   void StartTag(const char* name) {
     AddIndent();
@@ -94,6 +101,33 @@
     output_<< std::endl;
   }
 
+  void DumpLocation(Location location, Primitive::Type type) {
+    if (location.IsRegister()) {
+      if (type == Primitive::kPrimDouble || type == Primitive::kPrimFloat) {
+        codegen_.DumpFloatingPointRegister(output_, location.reg().RegId());
+      } else {
+        codegen_.DumpCoreRegister(output_, location.reg().RegId());
+      }
+    } else {
+      DCHECK(location.IsStackSlot());
+      output_ << location.GetStackIndex() << "(sp)";
+    }
+  }
+
+  void VisitParallelMove(HParallelMove* instruction) {
+    output_ << instruction->DebugName();
+    output_ << " (";
+    for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) {
+      MoveOperands* move = instruction->MoveOperandsAt(i);
+      DumpLocation(move->GetSource(), Primitive::kPrimInt);
+      output_ << " -> ";
+      DumpLocation(move->GetDestination(), Primitive::kPrimInt);
+      if (i + 1 != e) {
+        output_ << ", ";
+      }
+    }
+    output_ << ")";
+  }
 
   void VisitInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
@@ -104,24 +138,28 @@
       }
       output_ << "]";
     }
-    if (instruction->GetLifetimePosition() != kNoLifetime) {
+    if (pass_name_ == kLivenessPassName && instruction->GetLifetimePosition() != kNoLifetime) {
       output_ << " (liveness: " << instruction->GetLifetimePosition();
       if (instruction->HasLiveInterval()) {
         output_ << " ";
         const LiveInterval& interval = *instruction->GetLiveInterval();
         interval.Dump(output_);
-        if (interval.HasRegister()) {
-          int reg = interval.GetRegister();
-          output_ << " ";
-          if (instruction->GetType() == Primitive::kPrimFloat
-              || instruction->GetType() == Primitive::kPrimDouble) {
-            codegen_.DumpFloatingPointRegister(output_, reg);
-          } else {
-            codegen_.DumpCoreRegister(output_, reg);
-          }
-        }
       }
       output_ << ")";
+    } else if (pass_name_ == kRegisterAllocatorPassName) {
+      LocationSummary* locations = instruction->GetLocations();
+      if (locations != nullptr) {
+        output_ << " ( ";
+        for (size_t i = 0; i < instruction->InputCount(); ++i) {
+          DumpLocation(locations->InAt(i), instruction->InputAt(i)->GetType());
+          output_ << " ";
+        }
+        output_ << ")";
+        if (locations->Out().IsValid()) {
+          output_ << " -> ";
+          DumpLocation(locations->Out(), instruction->GetType());
+        }
+      }
     }
   }
 
@@ -137,9 +175,9 @@
     }
   }
 
-  void Run(const char* pass_name) {
+  void Run() {
     StartTag("cfg");
-    PrintProperty("name", pass_name);
+    PrintProperty("name", pass_name_);
     VisitInsertionOrder();
     EndTag("cfg");
   }
@@ -188,6 +226,7 @@
 
  private:
   std::ostream& output_;
+  const char* pass_name_;
   const CodeGenerator& codegen_;
   size_t indent_;
 
@@ -209,7 +248,7 @@
   }
 
   is_enabled_ = true;
-  HGraphVisualizerPrinter printer(graph, *output_, codegen_);
+  HGraphVisualizerPrinter printer(graph, *output_, "", codegen_);
   printer.StartTag("compilation");
   printer.PrintProperty("name", pretty_name.c_str());
   printer.PrintProperty("method", pretty_name.c_str());
@@ -227,7 +266,7 @@
   }
 
   is_enabled_ = true;
-  HGraphVisualizerPrinter printer(graph, *output_, codegen_);
+  HGraphVisualizerPrinter printer(graph, *output_, "", codegen_);
   printer.StartTag("compilation");
   printer.PrintProperty("name", name);
   printer.PrintProperty("method", name);
@@ -239,8 +278,8 @@
   if (!is_enabled_) {
     return;
   }
-  HGraphVisualizerPrinter printer(graph_, *output_, codegen_);
-  printer.Run(pass_name);
+  HGraphVisualizerPrinter printer(graph_, *output_, pass_name, codegen_);
+  printer.Run();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 2638cf5..7cd74e9 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -25,6 +25,9 @@
 class DexCompilationUnit;
 class HGraph;
 
+static const char* kLivenessPassName = "liveness";
+static const char* kRegisterAllocatorPassName = "register";
+
 /**
  * If enabled, emits compilation information suitable for the c1visualizer tool
  * and IRHydra.
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index f9ae529..e4f9371 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -18,6 +18,7 @@
 
 #include "base/stringprintf.h"
 #include "builder.h"
+#include "code_generator.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "graph_visualizer.h"
@@ -41,8 +42,11 @@
   ASSERT_NE(graph, nullptr);
 
   graph->BuildDominatorTree();
+  graph->TransformToSSA();
   graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
+
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index c797497..987c5f2 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "builder.h"
+#include "code_generator.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "nodes.h"
@@ -56,14 +57,16 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
-  ASSERT_EQ(8u, range->GetEnd());
+  ASSERT_EQ(9u, range->GetEnd());
   HBasicBlock* block = graph->GetBlocks().Get(1);
   ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
   ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition());
@@ -101,14 +104,15 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
-  ASSERT_EQ(22u, range->GetEnd());
+  ASSERT_EQ(23u, range->GetEnd());
   HBasicBlock* block = graph->GetBlocks().Get(3);
   ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
   ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition());
@@ -149,25 +153,26 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
-  // Test for the 0 constant.
-  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  // Test for the 4 constant.
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
-  ASSERT_EQ(2u, range->GetStart());
+  ASSERT_EQ(4u, range->GetStart());
   // Last use is the phi at the return block so instruction is live until
   // the end of the then block.
   ASSERT_EQ(18u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
-  // Test for the 4 constant.
-  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  // Test for the 0 constant.
+  interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
   // The then branch is a hole for this constant, therefore its interval has 2 ranges.
   // First range starts from the definition and ends at the if block.
   range = interval->GetFirstRange();
-  ASSERT_EQ(4u, range->GetStart());
-  // 9 is the end of the if block.
+  ASSERT_EQ(2u, range->GetStart());
+  // 14 is the end of the if block.
   ASSERT_EQ(14u, range->GetEnd());
   // Second range is the else block.
   range = range->GetNext();
@@ -179,8 +184,9 @@
   // Test for the phi.
   interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
   range = interval->GetFirstRange();
+  ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(3)->GetLifetimePosition());
   ASSERT_EQ(22u, range->GetStart());
-  ASSERT_EQ(24u, range->GetEnd());
+  ASSERT_EQ(25u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
@@ -223,7 +229,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   // Test for the 0 constant.
@@ -248,7 +255,7 @@
   range = interval->GetFirstRange();
   // The instruction is live until the return instruction after the loop.
   ASSERT_EQ(6u, range->GetStart());
-  ASSERT_EQ(26u, range->GetEnd());
+  ASSERT_EQ(27u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the phi.
@@ -256,7 +263,7 @@
   range = interval->GetFirstRange();
   // Instruction is consumed by the if.
   ASSERT_EQ(14u, range->GetStart());
-  ASSERT_EQ(16u, range->GetEnd());
+  ASSERT_EQ(17u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 7a33620..2d0bc39 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "builder.h"
+#include "code_generator.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "nodes.h"
@@ -48,7 +49,8 @@
   graph->BuildDominatorTree();
   graph->TransformToSSA();
   graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
   std::ostringstream buffer;
@@ -69,17 +71,17 @@
 TEST(LivenessTest, CFG1) {
   const char* expected =
     "Block 0\n"
-    "  live in: ()\n"
-    "  live out: ()\n"
-    "  kill: ()\n"
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (1)\n"
     "Block 1\n"
-    "  live in: ()\n"
-    "  live out: ()\n"
-    "  kill: ()\n"
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n"
     "Block 2\n"
-    "  live in: ()\n"
-    "  live out: ()\n"
-    "  kill: ()\n";
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n";
 
   // Constant is not used.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 3c60d3c..40a39ad 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -150,6 +150,7 @@
 
   arm::ArmManagedRegister AsArm() const;
   x86::X86ManagedRegister AsX86() const;
+  x86_64::X86_64ManagedRegister AsX86_64() const;
 
   Kind GetKind() const {
     return KindField::Decode(value_);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 752466b..2a97fad 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -35,7 +35,7 @@
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_.Get(i);
       for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
-        block->GetSuccessors().Get(j)->RemovePredecessor(block, false);
+        block->GetSuccessors().Get(j)->RemovePredecessor(block);
       }
       for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
         block->RemovePhi(it.Current()->AsPhi());
@@ -143,8 +143,7 @@
   HBasicBlock* new_block = new (arena_) HBasicBlock(this);
   AddBlock(new_block);
   new_block->AddInstruction(new (arena_) HGoto());
-  block->RemoveSuccessor(successor);
-  block->AddSuccessor(new_block);
+  block->ReplaceSuccessor(successor, new_block);
   new_block->AddSuccessor(successor);
   if (successor->IsLoopHeader()) {
     // If we split at a back edge boundary, make the new block the back edge.
@@ -168,8 +167,7 @@
     new_back_edge->AddInstruction(new (arena_) HGoto());
     for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
       HBasicBlock* back_edge = info->GetBackEdges().Get(pred);
-      header->RemovePredecessor(back_edge);
-      back_edge->AddSuccessor(new_back_edge);
+      back_edge->ReplaceSuccessor(header, new_back_edge);
     }
     info->ClearBackEdges();
     info->AddBackEdge(new_back_edge);
@@ -190,9 +188,8 @@
     for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
       HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
       if (predecessor != back_edge) {
-        header->RemovePredecessor(predecessor);
+        predecessor->ReplaceSuccessor(header, pre_header);
         pred--;
-        predecessor->AddSuccessor(pre_header);
       }
     }
     pre_header->AddSuccessor(header);
@@ -294,12 +291,20 @@
 void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
   DCHECK(cursor->AsPhi() == nullptr);
   DCHECK(instruction->AsPhi() == nullptr);
+  DCHECK_EQ(instruction->GetId(), -1);
+  DCHECK_NE(cursor->GetId(), -1);
+  DCHECK_EQ(cursor->GetBlock(), this);
+  DCHECK(!instruction->IsControlFlow());
   instruction->next_ = cursor;
   instruction->previous_ = cursor->previous_;
   cursor->previous_ = instruction;
   if (GetFirstInstruction() == cursor) {
     instructions_.first_instruction_ = instruction;
+  } else {
+    instruction->previous_->next_ = instruction;
   }
+  instruction->SetBlock(this);
+  instruction->SetId(GetGraph()->GetNextInstructionId());
 }
 
 static void Add(HInstructionList* instruction_list,
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b1c8016..143d5c9 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -283,18 +283,16 @@
     block->predecessors_.Add(this);
   }
 
-  void RemovePredecessor(HBasicBlock* block, bool remove_in_successor = true) {
-    predecessors_.Delete(block);
-    if (remove_in_successor) {
-      block->successors_.Delete(this);
-    }
+  void ReplaceSuccessor(HBasicBlock* existing, HBasicBlock* new_block) {
+    size_t successor_index = GetSuccessorIndexOf(existing);
+    DCHECK_NE(successor_index, static_cast<size_t>(-1));
+    existing->RemovePredecessor(this);
+    new_block->predecessors_.Add(this);
+    successors_.Put(successor_index, new_block);
   }
 
-  void RemoveSuccessor(HBasicBlock* block, bool remove_in_predecessor = true) {
-    successors_.Delete(block);
-    if (remove_in_predecessor) {
-      block->predecessors_.Delete(this);
-    }
+  void RemovePredecessor(HBasicBlock* block) {
+    predecessors_.Delete(block);
   }
 
   void ClearAllPredecessors() {
@@ -315,6 +313,15 @@
     return -1;
   }
 
+  size_t GetSuccessorIndexOf(HBasicBlock* successor) {
+    for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
+      if (successors_.Get(i) == successor) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
   void AddInstruction(HInstruction* instruction);
   void RemoveInstruction(HInstruction* instruction);
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
@@ -455,6 +462,7 @@
   virtual void SetRawInputAt(size_t index, HInstruction* input) = 0;
 
   virtual bool NeedsEnvironment() const { return false; }
+  virtual bool IsControlFlow() const { return false; }
 
   void AddUseAt(HInstruction* user, size_t index) {
     uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_);
@@ -500,6 +508,7 @@
   void ReplaceWith(HInstruction* instruction);
 
 #define INSTRUCTION_TYPE_CHECK(type)                                           \
+  bool Is##type() { return (As##type() != nullptr); }                          \
   virtual H##type* As##type() { return nullptr; }
 
   FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
@@ -733,7 +742,9 @@
 // instruction that branches to the exit block.
 class HReturnVoid : public HTemplateInstruction<0> {
  public:
-  HReturnVoid() { }
+  HReturnVoid() {}
+
+  virtual bool IsControlFlow() const { return true; }
 
   DECLARE_INSTRUCTION(ReturnVoid);
 
@@ -749,6 +760,8 @@
     SetRawInputAt(0, value);
   }
 
+  virtual bool IsControlFlow() const { return true; }
+
   DECLARE_INSTRUCTION(Return);
 
  private:
@@ -760,7 +773,9 @@
 // exit block.
 class HExit : public HTemplateInstruction<0> {
  public:
-  HExit() { }
+  HExit() {}
+
+  virtual bool IsControlFlow() const { return true; }
 
   DECLARE_INSTRUCTION(Exit);
 
@@ -771,12 +786,14 @@
 // Jumps from one block to another.
 class HGoto : public HTemplateInstruction<0> {
  public:
-  HGoto() { }
+  HGoto() {}
 
   HBasicBlock* GetSuccessor() const {
     return GetBlock()->GetSuccessors().Get(0);
   }
 
+  virtual bool IsControlFlow() const { return true; }
+
   DECLARE_INSTRUCTION(Goto);
 
  private:
@@ -799,6 +816,8 @@
     return GetBlock()->GetSuccessors().Get(1);
   }
 
+  virtual bool IsControlFlow() const { return true; }
+
   DECLARE_INSTRUCTION(If);
 
  private:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index dfbb488..ccacbef 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -85,6 +85,8 @@
   // For testing purposes, we put a special marker on method names that should be compiled
   // with this compiler. This makes sure we're not regressing.
   bool shouldCompile = dex_compilation_unit.GetSymbol().find("00024opt_00024") != std::string::npos;
+  bool shouldOptimize =
+      dex_compilation_unit.GetSymbol().find("00024reg_00024") != std::string::npos;
 
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -116,7 +118,36 @@
   visualizer.DumpGraph("builder");
 
   CodeVectorAllocator allocator;
-  codegen->Compile(&allocator);
+
+  if (RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) {
+    graph->BuildDominatorTree();
+    graph->TransformToSSA();
+    visualizer.DumpGraph("ssa");
+
+    graph->FindNaturalLoops();
+    SsaLivenessAnalysis liveness(*graph, codegen);
+    liveness.Analyze();
+    visualizer.DumpGraph(kLivenessPassName);
+
+    RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    visualizer.DumpGraph(kRegisterAllocatorPassName);
+    codegen->CompileOptimized(&allocator);
+  } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
+    LOG(FATAL) << "Could not allocate registers in optimizing compiler";
+  } else {
+    codegen->CompileBaseline(&allocator);
+
+    // Run these phases to get some test coverage.
+    graph->BuildDominatorTree();
+    graph->TransformToSSA();
+    visualizer.DumpGraph("ssa");
+    graph->FindNaturalLoops();
+    SsaLivenessAnalysis liveness(*graph, codegen);
+    liveness.Analyze();
+    visualizer.DumpGraph(kLivenessPassName);
+  }
 
   std::vector<uint8_t> mapping_table;
   codegen->BuildMappingTable(&mapping_table);
@@ -125,19 +156,6 @@
   std::vector<uint8_t> gc_map;
   codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
 
-  // Run these phases to get some test coverage.
-  graph->BuildDominatorTree();
-  graph->TransformToSSA();
-  visualizer.DumpGraph("ssa");
-
-  graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
-  liveness.Analyze();
-  visualizer.DumpGraph("liveness");
-
-  RegisterAllocator(graph->GetArena(), *codegen).AllocateRegisters(liveness);
-  visualizer.DumpGraph("register");
-
   return new CompiledMethod(GetCompilerDriver(),
                             instruction_set,
                             allocator.GetMemory(),
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 3d2d136..cadd3c5 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -147,4 +147,64 @@
   }
 }
 
+bool ParallelMoveResolver::IsScratchLocation(Location loc) {
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    if (moves_.Get(i)->Blocks(loc)) {
+      return false;
+    }
+  }
+
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    if (moves_.Get(i)->GetDestination().Equals(loc)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+                                                  int register_count,
+                                                  int if_scratch,
+                                                  bool* spilled) {
+  DCHECK_NE(blocked, if_scratch);
+  int scratch = -1;
+  for (int reg = 0; reg < register_count; ++reg) {
+    if ((blocked != reg) &&
+        IsScratchLocation(Location::RegisterLocation(ManagedRegister(reg)))) {
+      scratch = reg;
+      break;
+    }
+  }
+
+  if (scratch == -1) {
+    *spilled = true;
+    scratch = if_scratch;
+  } else {
+    *spilled = false;
+  }
+
+  return scratch;
+}
+
+
+ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
+    ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
+    : resolver_(resolver),
+      reg_(kNoRegister),
+      spilled_(false) {
+  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
+
+  if (spilled_) {
+    resolver->SpillScratch(reg_);
+  }
+}
+
+
+ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
+  if (spilled_) {
+    resolver_->RestoreScratch(reg_);
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index ff20cb0..fcc1de6 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -23,6 +23,7 @@
 namespace art {
 
 class HParallelMove;
+class Location;
 class MoveOperands;
 
 /**
@@ -39,15 +40,40 @@
   void EmitNativeCode(HParallelMove* parallel_move);
 
  protected:
+  class ScratchRegisterScope : public ValueObject {
+   public:
+    ScratchRegisterScope(ParallelMoveResolver* resolver,
+                         int blocked,
+                         int if_scratch,
+                         int number_of_registers);
+    ~ScratchRegisterScope();
+
+    int GetRegister() const { return reg_; }
+    bool IsSpilled() const { return spilled_; }
+
+   private:
+    ParallelMoveResolver* resolver_;
+    int reg_;
+    bool spilled_;
+  };
+
+  bool IsScratchLocation(Location loc);
+  int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
+
   // Emit a move.
   virtual void EmitMove(size_t index) = 0;
 
   // Execute a move by emitting a swap of two operands.
   virtual void EmitSwap(size_t index) = 0;
 
+  virtual void SpillScratch(int reg) = 0;
+  virtual void RestoreScratch(int reg) = 0;
+
   // List of moves not yet resolved.
   GrowableArray<MoveOperands*> moves_;
 
+  static constexpr int kNoRegister = -1;
+
  private:
   // Build the initial list of moves.
   void BuildInitialMoveList(HParallelMove* parallel_move);
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 88df24d..093856d 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -50,6 +50,9 @@
              << ")";
   }
 
+  virtual void SpillScratch(int reg) {}
+  virtual void RestoreScratch(int reg) {}
+
   std::string GetMessage() const {
     return  message_.str();
   }
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index a7727c0..2c8166e 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_
 #define ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_
 
+#include "base/stringprintf.h"
 #include "nodes.h"
 
 namespace art {
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index dd175d2..348e9d4 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -22,67 +22,206 @@
 namespace art {
 
 static constexpr size_t kMaxLifetimePosition = -1;
+static constexpr size_t kDefaultNumberOfSpillSlots = 4;
 
-RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen)
+RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
+                                     CodeGenerator* codegen,
+                                     const SsaLivenessAnalysis& liveness)
       : allocator_(allocator),
         codegen_(codegen),
+        liveness_(liveness),
         unhandled_(allocator, 0),
         handled_(allocator, 0),
         active_(allocator, 0),
         inactive_(allocator, 0),
+        physical_register_intervals_(allocator, codegen->GetNumberOfRegisters()),
+        spill_slots_(allocator, kDefaultNumberOfSpillSlots),
         processing_core_registers_(false),
         number_of_registers_(-1),
         registers_array_(nullptr),
-        blocked_registers_(allocator->AllocArray<bool>(codegen.GetNumberOfRegisters())) {
-  codegen.SetupBlockedRegisters(blocked_registers_);
+        blocked_registers_(allocator->AllocArray<bool>(codegen->GetNumberOfRegisters())) {
+  codegen->SetupBlockedRegisters(blocked_registers_);
+  physical_register_intervals_.SetSize(codegen->GetNumberOfRegisters());
 }
 
-static bool ShouldProcess(bool processing_core_registers, HInstruction* instruction) {
-  bool is_core_register = (instruction->GetType() != Primitive::kPrimDouble)
-      && (instruction->GetType() != Primitive::kPrimFloat);
+bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph,
+                                                InstructionSet instruction_set) {
+  if (!Supports(instruction_set)) {
+    return false;
+  }
+  for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) {
+    for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions());
+         !it.Done();
+         it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->NeedsEnvironment()) return false;
+      if (current->GetType() == Primitive::kPrimLong) return false;
+      if (current->GetType() == Primitive::kPrimFloat) return false;
+      if (current->GetType() == Primitive::kPrimDouble) return false;
+    }
+  }
+  return true;
+}
+
+static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
+  bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
+      && (interval->GetType() != Primitive::kPrimFloat);
   return processing_core_registers == is_core_register;
 }
 
-void RegisterAllocator::AllocateRegistersInternal(const SsaLivenessAnalysis& liveness) {
+void RegisterAllocator::AllocateRegisters() {
+  processing_core_registers_ = true;
+  AllocateRegistersInternal();
+  processing_core_registers_ = false;
+  AllocateRegistersInternal();
+
+  Resolve();
+
+  if (kIsDebugBuild) {
+    processing_core_registers_ = true;
+    ValidateInternal(true);
+    processing_core_registers_ = false;
+    ValidateInternal(true);
+  }
+}
+
+void RegisterAllocator::BlockRegister(Location location,
+                                      size_t start,
+                                      size_t end,
+                                      Primitive::Type type) {
+  int reg = location.reg().RegId();
+  LiveInterval* interval = physical_register_intervals_.Get(reg);
+  if (interval == nullptr) {
+    interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
+    physical_register_intervals_.Put(reg, interval);
+    inactive_.Add(interval);
+  }
+  DCHECK(interval->GetRegister() == reg);
+  interval->AddRange(start, end);
+}
+
+void RegisterAllocator::AllocateRegistersInternal() {
   number_of_registers_ = processing_core_registers_
-      ? codegen_.GetNumberOfCoreRegisters()
-      : codegen_.GetNumberOfFloatingPointRegisters();
+      ? codegen_->GetNumberOfCoreRegisters()
+      : codegen_->GetNumberOfFloatingPointRegisters();
 
   registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_);
 
   // Iterate post-order, to ensure the list is sorted, and the last added interval
   // is the one with the lowest start position.
-  for (size_t i = liveness.GetNumberOfSsaValues(); i > 0; --i) {
-    HInstruction* instruction = liveness.GetInstructionFromSsaIndex(i - 1);
-    if (ShouldProcess(processing_core_registers_, instruction)) {
-      LiveInterval* current = instruction->GetLiveInterval();
+  for (size_t i = liveness_.GetNumberOfSsaValues(); i > 0; --i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i - 1);
+    LiveInterval* current = instruction->GetLiveInterval();
+    if (ShouldProcess(processing_core_registers_, current)) {
       DCHECK(unhandled_.IsEmpty() || current->StartsBefore(unhandled_.Peek()));
-      unhandled_.Add(current);
+
+      LocationSummary* locations = instruction->GetLocations();
+      if (locations->GetTempCount() != 0) {
+        // Note that we already filtered out instructions requiring temporaries in
+        // RegisterAllocator::CanAllocateRegistersFor.
+        LOG(FATAL) << "Unimplemented";
+      }
+
+      // Some instructions define their output in fixed register/stack slot. We need
+      // to ensure we know these locations before doing register allocation. For a
+      // given register, we create an interval that covers these locations. The register
+      // will be unavailable at these locations when trying to allocate one for an
+      // interval.
+      //
+      // The backwards walking ensures the ranges are ordered on increasing start positions.
+      Location output = locations->Out();
+      size_t position = instruction->GetLifetimePosition();
+      if (output.IsRegister()) {
+        // Shift the interval's start by one to account for the blocked register.
+        current->SetFrom(position + 1);
+        current->SetRegister(output.reg().RegId());
+        BlockRegister(output, position, position + 1, instruction->GetType());
+      } else if (output.IsStackSlot()) {
+        current->SetSpillSlot(output.GetStackIndex());
+      }
+      for (size_t i = 0; i < instruction->InputCount(); ++i) {
+        Location input = locations->InAt(i);
+        if (input.IsRegister()) {
+          BlockRegister(input, position, position + 1, instruction->InputAt(i)->GetType());
+        }
+      }
+
+      // Add the interval to the correct list.
+      if (current->HasRegister()) {
+        DCHECK(instruction->IsParameterValue());
+        inactive_.Add(current);
+      } else if (current->HasSpillSlot()) {
+        DCHECK(instruction->IsParameterValue());
+        // Split before first register use.
+        size_t first_register_use = current->FirstRegisterUse();
+        if (first_register_use != kNoLifetime) {
+          LiveInterval* split = Split(current, first_register_use - 1);
+          // The new interval may start at a late
+          AddToUnhandled(split);
+        } else {
+          // Nothing to do, we won't allocate a register for this value.
+        }
+      } else {
+        DCHECK(unhandled_.IsEmpty() || current->StartsBefore(unhandled_.Peek()));
+        unhandled_.Add(current);
+      }
     }
   }
 
   LinearScan();
-  if (kIsDebugBuild) {
-    ValidateInternal(liveness, true);
-  }
 }
 
-bool RegisterAllocator::ValidateInternal(const SsaLivenessAnalysis& liveness,
-                                         bool log_fatal_on_failure) const {
+class AllRangesIterator : public ValueObject {
+ public:
+  explicit AllRangesIterator(LiveInterval* interval)
+      : current_interval_(interval),
+        current_range_(interval->GetFirstRange()) {}
+
+  bool Done() const { return current_interval_ == nullptr; }
+  LiveRange* CurrentRange() const { return current_range_; }
+  LiveInterval* CurrentInterval() const { return current_interval_; }
+
+  void Advance() {
+    current_range_ = current_range_->GetNext();
+    if (current_range_ == nullptr) {
+      current_interval_ = current_interval_->GetNextSibling();
+      if (current_interval_ != nullptr) {
+        current_range_ = current_interval_->GetFirstRange();
+      }
+    }
+  }
+
+ private:
+  LiveInterval* current_interval_;
+  LiveRange* current_range_;
+
+  DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
   // To simplify unit testing, we eagerly create the array of intervals, and
   // call the helper method.
   GrowableArray<LiveInterval*> intervals(allocator_, 0);
-  for (size_t i = 0; i < liveness.GetNumberOfSsaValues(); ++i) {
-    HInstruction* instruction = liveness.GetInstructionFromSsaIndex(i);
-    if (ShouldProcess(processing_core_registers_, instruction)) {
+  for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
       intervals.Add(instruction->GetLiveInterval());
     }
   }
-  return ValidateIntervals(intervals, codegen_, allocator_, processing_core_registers_,
-                           log_fatal_on_failure);
+
+  for (size_t i = 0, e = physical_register_intervals_.Size(); i < e; ++i) {
+    LiveInterval* fixed = physical_register_intervals_.Get(i);
+    if (fixed != nullptr && ShouldProcess(processing_core_registers_, fixed)) {
+      intervals.Add(fixed);
+    }
+  }
+
+  return ValidateIntervals(intervals, spill_slots_.Size(), *codegen_, allocator_,
+                           processing_core_registers_, log_fatal_on_failure);
 }
 
-bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ranges,
+bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals,
+                                          size_t number_of_spill_slots,
                                           const CodeGenerator& codegen,
                                           ArenaAllocator* allocator,
                                           bool processing_core_registers,
@@ -90,25 +229,43 @@
   size_t number_of_registers = processing_core_registers
       ? codegen.GetNumberOfCoreRegisters()
       : codegen.GetNumberOfFloatingPointRegisters();
-  GrowableArray<ArenaBitVector*> bit_vectors(allocator, number_of_registers);
+  GrowableArray<ArenaBitVector*> liveness_of_values(
+      allocator, number_of_registers + number_of_spill_slots);
 
   // Allocate a bit vector per register. A live interval that has a register
   // allocated will populate the associated bit vector based on its live ranges.
-  for (size_t i = 0; i < number_of_registers; i++) {
-    bit_vectors.Add(new (allocator) ArenaBitVector(allocator, 0, true));
+  for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) {
+    liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true));
   }
 
-  for (size_t i = 0, e = ranges.Size(); i < e; ++i) {
-    LiveInterval* current = ranges.Get(i);
-    do {
-      if (!current->HasRegister()) {
-        continue;
+  for (size_t i = 0, e = intervals.Size(); i < e; ++i) {
+    for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) {
+      LiveInterval* current = it.CurrentInterval();
+      HInstruction* defined_by = current->GetParent()->GetDefinedBy();
+      if (current->GetParent()->HasSpillSlot()
+           // Parameters have their own stack slot.
+           && !(defined_by != nullptr && defined_by->IsParameterValue())) {
+        BitVector* liveness_of_spill_slot = liveness_of_values.Get(
+            number_of_registers + current->GetParent()->GetSpillSlot() / kVRegSize);
+        for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
+          if (liveness_of_spill_slot->IsBitSet(j)) {
+            if (log_fatal_on_failure) {
+              std::ostringstream message;
+              message << "Spill slot conflict at " << j;
+              LOG(FATAL) << message.str();
+            } else {
+              return false;
+            }
+          } else {
+            liveness_of_spill_slot->SetBit(j);
+          }
+        }
       }
-      BitVector* vector = bit_vectors.Get(current->GetRegister());
-      LiveRange* range = current->GetFirstRange();
-      do {
-        for (size_t j = range->GetStart(); j < range->GetEnd(); ++j) {
-          if (vector->IsBitSet(j)) {
+
+      if (current->HasRegister()) {
+        BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister());
+        for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
+          if (liveness_of_register->IsBitSet(j)) {
             if (log_fatal_on_failure) {
               std::ostringstream message;
               message << "Register conflict at " << j << " for ";
@@ -122,23 +279,23 @@
               return false;
             }
           } else {
-            vector->SetBit(j);
+            liveness_of_register->SetBit(j);
           }
         }
-      } while ((range = range->GetNext()) != nullptr);
-    } while ((current = current->GetNextSibling()) != nullptr);
+      }
+    }
   }
   return true;
 }
 
-void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) {
+void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
   interval->Dump(stream);
   stream << ": ";
   if (interval->HasRegister()) {
     if (processing_core_registers_) {
-      codegen_.DumpCoreRegister(stream, interval->GetRegister());
+      codegen_->DumpCoreRegister(stream, interval->GetRegister());
     } else {
-      codegen_.DumpFloatingPointRegister(stream, interval->GetRegister());
+      codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
     }
   } else {
     stream << "spilled";
@@ -151,6 +308,7 @@
   while (!unhandled_.IsEmpty()) {
     // (1) Remove interval with the lowest start position from unhandled.
     LiveInterval* current = unhandled_.Pop();
+    DCHECK(!current->IsFixed() && !current->HasRegister() && !current->HasSpillSlot());
     size_t position = current->GetStart();
 
     // (2) Remove currently active intervals that are dead at this position.
@@ -210,13 +368,6 @@
     free_until[i] = kMaxLifetimePosition;
   }
 
-  // For each active interval, set its register to not free.
-  for (size_t i = 0, e = active_.Size(); i < e; ++i) {
-    LiveInterval* interval = active_.Get(i);
-    DCHECK(interval->HasRegister());
-    free_until[interval->GetRegister()] = 0;
-  }
-
   // For each inactive interval, set its register to be free until
   // the next intersection with `current`.
   // Thanks to SSA, this should only be needed for intervals
@@ -230,6 +381,13 @@
     }
   }
 
+  // For each active interval, set its register to not free.
+  for (size_t i = 0, e = active_.Size(); i < e; ++i) {
+    LiveInterval* interval = active_.Get(i);
+    DCHECK(interval->HasRegister());
+    free_until[interval->GetRegister()] = 0;
+  }
+
   // Pick the register that is free the longest.
   int reg = -1;
   for (size_t i = 0; i < number_of_registers_; ++i) {
@@ -270,7 +428,7 @@
 bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
   size_t first_register_use = current->FirstRegisterUse();
   if (current->FirstRegisterUse() == kNoLifetime) {
-    // TODO: Allocate spill slot for `current`.
+    AllocateSpillSlotFor(current);
     return false;
   }
 
@@ -285,9 +443,13 @@
   for (size_t i = 0, e = active_.Size(); i < e; ++i) {
     LiveInterval* active = active_.Get(i);
     DCHECK(active->HasRegister());
-    size_t use = active->FirstRegisterUseAfter(current->GetStart());
-    if (use != kNoLifetime) {
-      next_use[active->GetRegister()] = use;
+    if (active->IsFixed()) {
+      next_use[active->GetRegister()] = current->GetStart();
+    } else {
+      size_t use = active->FirstRegisterUseAfter(current->GetStart());
+      if (use != kNoLifetime) {
+        next_use[active->GetRegister()] = use;
+      }
     }
   }
 
@@ -298,9 +460,17 @@
   for (size_t i = 0, e = inactive_.Size(); i < e; ++i) {
     LiveInterval* inactive = inactive_.Get(i);
     DCHECK(inactive->HasRegister());
-    size_t use = inactive->FirstRegisterUseAfter(current->GetStart());
-    if (use != kNoLifetime) {
-      next_use[inactive->GetRegister()] = use;
+    size_t next_intersection = inactive->FirstIntersectionWith(current);
+    if (next_intersection != kNoLifetime) {
+      if (inactive->IsFixed()) {
+        next_use[inactive->GetRegister()] =
+            std::min(next_intersection, next_use[inactive->GetRegister()]);
+      } else {
+        size_t use = inactive->FirstRegisterUseAfter(current->GetStart());
+        if (use != kNoLifetime) {
+          next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
+        }
+      }
     }
   }
 
@@ -317,6 +487,7 @@
   if (first_register_use >= next_use[reg]) {
     // If the first use of that instruction is after the last use of the found
     // register, we split this interval just before its first register use.
+    AllocateSpillSlotFor(current);
     LiveInterval* split = Split(current, first_register_use - 1);
     AddToUnhandled(split);
     return false;
@@ -328,6 +499,7 @@
     for (size_t i = 0, e = active_.Size(); i < e; ++i) {
       LiveInterval* active = active_.Get(i);
       if (active->GetRegister() == reg) {
+        DCHECK(!active->IsFixed());
         LiveInterval* split = Split(active, current->GetStart());
         active_.DeleteAt(i);
         handled_.Add(active);
@@ -339,11 +511,19 @@
     for (size_t i = 0; i < inactive_.Size(); ++i) {
       LiveInterval* inactive = inactive_.Get(i);
       if (inactive->GetRegister() == reg) {
-        LiveInterval* split = Split(inactive, current->GetStart());
-        inactive_.DeleteAt(i);
-        handled_.Add(inactive);
-        AddToUnhandled(split);
-        --i;
+        size_t next_intersection = inactive->FirstIntersectionWith(current);
+        if (next_intersection != kNoLifetime) {
+          if (inactive->IsFixed()) {
+            LiveInterval* split = Split(current, next_intersection);
+            AddToUnhandled(split);
+          } else {
+            LiveInterval* split = Split(inactive, current->GetStart());
+            inactive_.DeleteAt(i);
+            handled_.Add(inactive);
+            AddToUnhandled(split);
+            --i;
+          }
+        }
       }
     }
 
@@ -352,13 +532,15 @@
 }
 
 void RegisterAllocator::AddToUnhandled(LiveInterval* interval) {
+  size_t insert_at = 0;
   for (size_t i = unhandled_.Size(); i > 0; --i) {
     LiveInterval* current = unhandled_.Get(i - 1);
     if (current->StartsAfter(interval)) {
-      unhandled_.InsertAt(i, interval);
+      insert_at = i;
       break;
     }
   }
+  unhandled_.InsertAt(insert_at, interval);
 }
 
 LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
@@ -370,9 +552,371 @@
     return interval;
   } else {
     LiveInterval* new_interval = interval->SplitAt(position);
-    // TODO: Allocate spill slot for `interval`.
     return new_interval;
   }
 }
 
+void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
+  LiveInterval* parent = interval->GetParent();
+
+  // An instruction gets a spill slot for its entire lifetime. If the parent
+  // of this interval already has a spill slot, there is nothing to do.
+  if (parent->HasSpillSlot()) {
+    return;
+  }
+
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (defined_by->IsParameterValue()) {
+    // Parameters have their own stack slot.
+    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+    return;
+  }
+
+  LiveInterval* last_sibling = interval;
+  while (last_sibling->GetNextSibling() != nullptr) {
+    last_sibling = last_sibling->GetNextSibling();
+  }
+  size_t end = last_sibling->GetEnd();
+
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
+    if (spill_slots_.Get(slot) <= parent->GetStart()) {
+      break;
+    }
+  }
+
+  if (slot == spill_slots_.Size()) {
+    // We need a new spill slot.
+    spill_slots_.Add(end);
+  } else {
+    spill_slots_.Put(slot, end);
+  }
+
+  parent->SetSpillSlot(slot * kVRegSize);
+}
+
+static Location ConvertToLocation(LiveInterval* interval) {
+  if (interval->HasRegister()) {
+    return Location::RegisterLocation(ManagedRegister(interval->GetRegister()));
+  } else {
+    DCHECK(interval->GetParent()->HasSpillSlot());
+    return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+  }
+}
+
+// We create a special marker for inputs moves to differentiate them from
+// moves created during resolution. They must be different instructions
+// because the input moves work on the assumption that the interval moves
+// have been executed.
+static constexpr size_t kInputMoveLifetimePosition = 0;
+static bool IsInputMove(HInstruction* instruction) {
+  return instruction->GetLifetimePosition() == kInputMoveLifetimePosition;
+}
+
+void RegisterAllocator::AddInputMoveFor(HInstruction* instruction,
+                                        Location source,
+                                        Location destination) const {
+  if (source.Equals(destination)) return;
+
+  DCHECK(instruction->AsPhi() == nullptr);
+
+  HInstruction* previous = instruction->GetPrevious();
+  HParallelMove* move = nullptr;
+  if (previous == nullptr
+      || previous->AsParallelMove() == nullptr
+      || !IsInputMove(previous)) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(kInputMoveLifetimePosition);
+    instruction->GetBlock()->InsertInstructionBefore(move, instruction);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  DCHECK(IsInputMove(move));
+  move->AddMove(new (allocator_) MoveOperands(source, destination));
+}
+
+void RegisterAllocator::InsertParallelMoveAt(size_t position,
+                                             Location source,
+                                             Location destination) const {
+  if (source.Equals(destination)) return;
+
+  HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
+  if (at == nullptr) {
+    // Block boundary, don't no anything the connection of split siblings will handle it.
+    return;
+  }
+  HParallelMove* move;
+  if ((position & 1) == 1) {
+    // Move must happen after the instruction.
+    DCHECK(!at->IsControlFlow());
+    move = at->GetNext()->AsParallelMove();
+    // This is a parallel move for connecting siblings in a same block. We need to
+    // differentiate it with moves for connecting blocks, and input moves.
+    if (move == nullptr || move->GetLifetimePosition() != position) {
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
+    }
+  } else {
+    // Move must happen before the instruction.
+    HInstruction* previous = at->GetPrevious();
+    if (previous != nullptr && previous->AsParallelMove() != nullptr) {
+      // This is a parallel move for connecting siblings in a same block. We need to
+      // differentiate it with moves for connecting blocks, and input moves.
+      if (previous->GetLifetimePosition() != position) {
+        previous = previous->GetPrevious();
+      }
+    }
+    if (previous == nullptr || previous->AsParallelMove() == nullptr) {
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at);
+    } else {
+      move = previous->AsParallelMove();
+    }
+  }
+  move->AddMove(new (allocator_) MoveOperands(source, destination));
+}
+
+void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                                   Location source,
+                                                   Location destination) const {
+  if (source.Equals(destination)) return;
+
+  DCHECK_EQ(block->GetSuccessors().Size(), 1u);
+  HInstruction* last = block->GetLastInstruction();
+  HInstruction* previous = last->GetPrevious();
+  HParallelMove* move;
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and output moves.
+  if (previous == nullptr || previous->AsParallelMove() == nullptr
+      || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(block->GetLifetimeEnd());
+    block->InsertInstructionBefore(move, last);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  move->AddMove(new (allocator_) MoveOperands(source, destination));
+}
+
+void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                                    Location source,
+                                                    Location destination) const {
+  if (source.Equals(destination)) return;
+
+  HInstruction* first = block->GetFirstInstruction();
+  HParallelMove* move = first->AsParallelMove();
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and input moves.
+  if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(block->GetLifetimeStart());
+    block->InsertInstructionBefore(move, first);
+  }
+  move->AddMove(new (allocator_) MoveOperands(source, destination));
+}
+
+void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
+                                        Location source,
+                                        Location destination) const {
+  if (source.Equals(destination)) return;
+
+  if (instruction->AsPhi() != nullptr) {
+    InsertParallelMoveAtEntryOf(instruction->GetBlock(), source, destination);
+    return;
+  }
+
+  size_t position = instruction->GetLifetimePosition() + 1;
+  HParallelMove* move = instruction->GetNext()->AsParallelMove();
+  // This is a parallel move for moving the output of an instruction. We need
+  // to differentiate with input moves, moves for connecting siblings in a
+  // and moves for connecting blocks.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
+  }
+  move->AddMove(new (allocator_) MoveOperands(source, destination));
+}
+
+void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
+  LiveInterval* current = interval;
+  if (current->HasSpillSlot() && current->HasRegister()) {
+    // We spill eagerly, so move must be at definition.
+    InsertMoveAfter(interval->GetDefinedBy(),
+                    Location::RegisterLocation(ManagedRegister(interval->GetRegister())),
+                    Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+  }
+  UsePosition* use = current->GetFirstUse();
+
+  // Walk over all siblings, updating locations of use positions, and
+  // connecting them when they are adjacent.
+  do {
+    Location source = ConvertToLocation(current);
+
+    // Walk over all uses covered by this interval, and update the location
+    // information.
+    while (use != nullptr && use->GetPosition() <= current->GetEnd()) {
+      if (!use->GetIsEnvironment()) {
+        LocationSummary* locations = use->GetUser()->GetLocations();
+        Location expected_location = locations->InAt(use->GetInputIndex());
+        if (expected_location.IsUnallocated()) {
+          locations->SetInAt(use->GetInputIndex(), source);
+        } else {
+          AddInputMoveFor(use->GetUser(), source, expected_location);
+        }
+      }
+      use = use->GetNext();
+    }
+
+    // If the next interval starts just after this one, and has a register,
+    // insert a move.
+    LiveInterval* next_sibling = current->GetNextSibling();
+    if (next_sibling != nullptr
+        && next_sibling->HasRegister()
+        && current->GetEnd() == next_sibling->GetStart()) {
+      Location destination = ConvertToLocation(next_sibling);
+      InsertParallelMoveAt(current->GetEnd(), source, destination);
+    }
+    current = next_sibling;
+  } while (current != nullptr);
+  DCHECK(use == nullptr);
+}
+
+void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
+                                             HBasicBlock* from,
+                                             HBasicBlock* to) const {
+  if (interval->GetNextSibling() == nullptr) {
+    // Nothing to connect. The whole range was allocated to the same location.
+    return;
+  }
+
+  size_t from_position = from->GetLifetimeEnd() - 1;
+  size_t to_position = to->GetLifetimeStart();
+
+  LiveInterval* destination = nullptr;
+  LiveInterval* source = nullptr;
+
+  LiveInterval* current = interval;
+
+  // Check the intervals that cover `from` and `to`.
+  while ((current != nullptr) && (source == nullptr || destination == nullptr)) {
+    if (current->Covers(from_position)) {
+      DCHECK(source == nullptr);
+      source = current;
+    }
+    if (current->Covers(to_position)) {
+      DCHECK(destination == nullptr);
+      destination = current;
+    }
+
+    current = current->GetNextSibling();
+  }
+
+  if (destination == source) {
+    // Interval was not split.
+    return;
+  }
+
+  if (!destination->HasRegister()) {
+    // Values are eagerly spilled. Spill slot already contains appropriate value.
+    return;
+  }
+
+  // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
+  // we need to put the moves at the entry of `to`.
+  if (from->GetSuccessors().Size() == 1) {
+    InsertParallelMoveAtExitOf(from, ConvertToLocation(source), ConvertToLocation(destination));
+  } else {
+    DCHECK_EQ(to->GetPredecessors().Size(), 1u);
+    InsertParallelMoveAtEntryOf(to, ConvertToLocation(source), ConvertToLocation(destination));
+  }
+}
+
+// Returns the location of `interval`, or siblings of `interval`, at `position`.
+static Location FindLocationAt(LiveInterval* interval, size_t position) {
+  LiveInterval* current = interval;
+  while (!current->Covers(position)) {
+    current = current->GetNextSibling();
+    DCHECK(current != nullptr);
+  }
+  return ConvertToLocation(current);
+}
+
+void RegisterAllocator::Resolve() {
+  codegen_->ComputeFrameSize(spill_slots_.Size());
+
+  // Adjust the Out Location of instructions.
+  // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    LiveInterval* current = instruction->GetLiveInterval();
+    LocationSummary* locations = instruction->GetLocations();
+    Location location = locations->Out();
+    if (instruction->AsParameterValue() != nullptr) {
+      // Now that we know the frame size, adjust the parameter's location.
+      if (location.IsStackSlot()) {
+        location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->SetOut(location);
+      } else if (location.IsDoubleStackSlot()) {
+        location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->SetOut(location);
+      } else if (current->HasSpillSlot()) {
+        current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
+      }
+    }
+
+    Location source = ConvertToLocation(current);
+
+    if (location.IsUnallocated()) {
+      if (location.GetPolicy() == Location::kSameAsFirstInput) {
+        locations->SetInAt(0, source);
+      }
+      locations->SetOut(source);
+    } else {
+      DCHECK(source.Equals(location));
+    }
+  }
+
+  // Connect siblings.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    ConnectSiblings(instruction->GetLiveInterval());
+  }
+
+  // Resolve non-linear control flow across branches. Order does not matter.
+  for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    BitVector* live = liveness_.GetLiveInSet(*block);
+    for (uint32_t idx : live->Indexes()) {
+      HInstruction* current = liveness_.GetInstructionFromSsaIndex(idx);
+      LiveInterval* interval = current->GetLiveInterval();
+      for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+        ConnectSplitSiblings(interval, block->GetPredecessors().Get(i), block);
+      }
+    }
+  }
+
+  // Resolve phi inputs. Order does not matter.
+  for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) {
+    HBasicBlock* current = it.Current();
+    for (HInstructionIterator it(current->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* phi = it.Current();
+      for (size_t i = 0, e = current->GetPredecessors().Size(); i < e; ++i) {
+        HBasicBlock* predecessor = current->GetPredecessors().Get(i);
+        DCHECK_EQ(predecessor->GetSuccessors().Size(), 1u);
+        HInstruction* input = phi->InputAt(i);
+        Location source = FindLocationAt(input->GetLiveInterval(),
+                                         predecessor->GetLastInstruction()->GetLifetimePosition());
+        Location destination = ConvertToLocation(phi->GetLiveInterval());
+        InsertParallelMoveAtExitOf(predecessor, source, destination);
+      }
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e575b96..e63122f 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -23,7 +23,12 @@
 namespace art {
 
 class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
 class LiveInterval;
+class Location;
 class SsaLivenessAnalysis;
 
 /**
@@ -31,40 +36,48 @@
  */
 class RegisterAllocator {
  public:
-  RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen);
+  RegisterAllocator(ArenaAllocator* allocator,
+                    CodeGenerator* codegen,
+                    const SsaLivenessAnalysis& analysis);
 
   // Main entry point for the register allocator. Given the liveness analysis,
   // allocates registers to live intervals.
-  void AllocateRegisters(const SsaLivenessAnalysis& liveness) {
-    processing_core_registers_ = true;
-    AllocateRegistersInternal(liveness);
-    processing_core_registers_ = false;
-    AllocateRegistersInternal(liveness);
-  }
+  void AllocateRegisters();
 
   // Validate that the register allocator did not allocate the same register to
   // intervals that intersect each other. Returns false if it did not.
-  bool Validate(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) {
+  bool Validate(bool log_fatal_on_failure) {
     processing_core_registers_ = true;
-    if (!ValidateInternal(liveness, log_fatal_on_failure)) {
+    if (!ValidateInternal(log_fatal_on_failure)) {
       return false;
     }
     processing_core_registers_ = false;
-    return ValidateInternal(liveness, log_fatal_on_failure);
+    return ValidateInternal(log_fatal_on_failure);
   }
 
   // Helper method for validation. Used by unit testing.
   static bool ValidateIntervals(const GrowableArray<LiveInterval*>& intervals,
+                                size_t number_of_spill_slots,
                                 const CodeGenerator& codegen,
                                 ArenaAllocator* allocator,
                                 bool processing_core_registers,
                                 bool log_fatal_on_failure);
 
+  static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
+  static bool Supports(InstructionSet instruction_set) {
+    return instruction_set == kX86 || instruction_set == kArm || instruction_set == kX86_64;
+  }
+
+  size_t GetNumberOfSpillSlots() const {
+    return spill_slots_.Size();
+  }
+
  private:
   // Main methods of the allocator.
   void LinearScan();
   bool TryAllocateFreeReg(LiveInterval* interval);
   bool AllocateBlockedReg(LiveInterval* interval);
+  void Resolve();
 
   // Add `interval` in the sorted list of unhandled intervals.
   void AddToUnhandled(LiveInterval* interval);
@@ -75,13 +88,33 @@
   // Returns whether `reg` is blocked by the code generator.
   bool IsBlocked(int reg) const;
 
+  // Update the interval for the register in `location` to cover [start, end).
+  void BlockRegister(Location location, size_t start, size_t end, Primitive::Type type);
+
+  // Allocate a spill slot for the given interval.
+  void AllocateSpillSlotFor(LiveInterval* interval);
+
+  // Connect adjacent siblings within blocks.
+  void ConnectSiblings(LiveInterval* interval);
+
+  // Connect siblings between block entries and exits.
+  void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
+
+  // Helper methods to insert parallel moves in the graph.
+  void InsertParallelMoveAtExitOf(HBasicBlock* block, Location source, Location destination) const;
+  void InsertParallelMoveAtEntryOf(HBasicBlock* block, Location source, Location destination) const;
+  void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
+  void AddInputMoveFor(HInstruction* instruction, Location source, Location destination) const;
+  void InsertParallelMoveAt(size_t position, Location source, Location destination) const;
+
   // Helper methods.
-  void AllocateRegistersInternal(const SsaLivenessAnalysis& liveness);
-  bool ValidateInternal(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) const;
-  void DumpInterval(std::ostream& stream, LiveInterval* interval);
+  void AllocateRegistersInternal();
+  bool ValidateInternal(bool log_fatal_on_failure) const;
+  void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
 
   ArenaAllocator* const allocator_;
-  const CodeGenerator& codegen_;
+  CodeGenerator* const codegen_;
+  const SsaLivenessAnalysis& liveness_;
 
   // List of intervals that must be processed, ordered by start position. Last entry
   // is the interval that has the lowest start position.
@@ -98,6 +131,13 @@
   // That is, they have a lifetime hole that spans the start of the new interval.
   GrowableArray<LiveInterval*> inactive_;
 
+  // Fixed intervals for physical registers. Such an interval covers the positions
+  // where an instruction requires a specific register.
+  GrowableArray<LiveInterval*> physical_register_intervals_;
+
+  // The spill slots allocated for live intervals.
+  GrowableArray<size_t> spill_slots_;
+
   // True if processing core registers. False if processing floating
   // point registers.
   bool processing_core_registers_;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 019d0f8..bfabc5a 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -40,12 +40,12 @@
   graph->BuildDominatorTree();
   graph->TransformToSSA();
   graph->FindNaturalLoops();
-  SsaLivenessAnalysis liveness(*graph);
-  liveness.Analyze();
   CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86);
-  RegisterAllocator register_allocator(&allocator, *codegen);
-  register_allocator.AllocateRegisters(liveness);
-  return register_allocator.Validate(liveness, false);
+  SsaLivenessAnalysis liveness(*graph, codegen);
+  liveness.Analyze();
+  RegisterAllocator register_allocator(&allocator, codegen, liveness);
+  register_allocator.AllocateRegisters();
+  return register_allocator.Validate(false);
 }
 
 /**
@@ -64,10 +64,12 @@
     static constexpr size_t ranges[][2] = {{0, 42}};
     intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 0));
     intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -77,10 +79,12 @@
     intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 43}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -90,10 +94,12 @@
     intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 43}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -103,10 +109,12 @@
     intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 47}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
-    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
     intervals.Reset();
   }
 
@@ -117,14 +125,17 @@
     intervals.Get(0)->SplitAt(43);
     static constexpr size_t ranges2[][2] = {{42, 47}};
     intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(1)->SetRegister(0);
     // Sibling of the first interval has no register allocated to it.
-    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
 
     intervals.Get(0)->GetNextSibling()->SetRegister(0);
-    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false));
+    ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
+        intervals, 0, *codegen, &allocator, true, false));
   }
 }
 
@@ -286,12 +297,12 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildSSAGraph(data, &allocator);
-  SsaLivenessAnalysis liveness(*graph);
-  liveness.Analyze();
   CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86);
-  RegisterAllocator register_allocator(&allocator, *codegen);
-  register_allocator.AllocateRegisters(liveness);
-  ASSERT_TRUE(register_allocator.Validate(liveness, false));
+  SsaLivenessAnalysis liveness(*graph, codegen);
+  liveness.Analyze();
+  RegisterAllocator register_allocator(&allocator, codegen, liveness);
+  register_allocator.AllocateRegisters();
+  ASSERT_TRUE(register_allocator.Validate(false));
 
   HBasicBlock* loop_header = graph->GetBlocks().Get(2);
   HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
@@ -303,7 +314,7 @@
   ASSERT_NE(phi_interval->GetRegister(), loop_update->GetRegister());
 
   HBasicBlock* return_block = graph->GetBlocks().Get(3);
-  HReturn* ret = return_block->GetFirstInstruction()->AsReturn();
+  HReturn* ret = return_block->GetLastInstruction()->AsReturn();
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 1284a97..471307e 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -15,22 +15,12 @@
  */
 
 #include "ssa_builder.h"
+
 #include "nodes.h"
+#include "ssa_type_propagation.h"
 
 namespace art {
 
-static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
-  // We trust the verifier has already done the necessary checking.
-  switch (existing) {
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimNot:
-      return existing;
-    default:
-      return new_type;
-  }
-}
-
 void SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block visited
   // (with the exception of loops) in order to create the right environment for that
@@ -44,18 +34,18 @@
     HBasicBlock* block = loop_headers_.Get(i);
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
-      Primitive::Type type = Primitive::kPrimVoid;
       for (size_t pred = 0; pred < block->GetPredecessors().Size(); pred++) {
         HInstruction* input = ValueOfLocal(block->GetPredecessors().Get(pred), phi->GetRegNumber());
         phi->AddInput(input);
-        type = MergeTypes(type, input->GetType());
       }
-      phi->SetType(type);
     }
   }
-  // TODO: Now that the type of loop phis is set, we need a type propagation phase.
 
-  // 3) Clear locals.
+  // 3) Propagate types of phis.
+  SsaTypePropagation type_propagation(GetGraph());
+  type_propagation.Run();
+
+  // 4) Clear locals.
   // TODO: Move this to a dead code eliminator phase.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
@@ -102,8 +92,8 @@
       for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
         HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local);
         if (current == nullptr) {
-//          one_predecessor_has_no_value = true;
-//          break;
+          one_predecessor_has_no_value = true;
+          break;
         } else if (current != value) {
           is_different = true;
         }
@@ -118,16 +108,10 @@
       if (is_different) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid);
-        Primitive::Type type = Primitive::kPrimVoid;
         for (size_t i = 0; i < block->GetPredecessors().Size(); i++) {
           HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(i), local);
-          // We need to merge the incoming types, as the Dex format does not
-          // guarantee the inputs have the same type. In particular the 0 constant is
-          // used for all types, but the graph builder treats it as an int.
-          type = MergeTypes(type, value->GetType());
           phi->SetRawInputAt(i, value);
         }
-        phi->SetType(type);
         block->AddPhi(phi);
         value = phi;
       }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index dc4b2e5..50ea00f 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -15,6 +15,8 @@
  */
 
 #include "ssa_liveness_analysis.h"
+
+#include "code_generator.h"
 #include "nodes.h"
 
 namespace art {
@@ -80,38 +82,6 @@
   order->Add(block);
 }
 
-class HLinearOrderIterator : public ValueObject {
- public:
-  explicit HLinearOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
-      : post_order_(post_order), index_(post_order.Size()) {}
-
-  bool Done() const { return index_ == 0; }
-  HBasicBlock* Current() const { return post_order_.Get(index_ -1); }
-  void Advance() { --index_; DCHECK_GE(index_, 0U); }
-
- private:
-  const GrowableArray<HBasicBlock*>& post_order_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
-};
-
-class HLinearPostOrderIterator : public ValueObject {
- public:
-  explicit HLinearPostOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
-      : post_order_(post_order), index_(0) {}
-
-  bool Done() const { return index_ == post_order_.Size(); }
-  HBasicBlock* Current() const { return post_order_.Get(index_); }
-  void Advance() { ++index_; }
-
- private:
-  const GrowableArray<HBasicBlock*>& post_order_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
-};
-
 void SsaLivenessAnalysis::LinearizeGraph() {
   // For simplicity of the implementation, we create post linear order. The order for
   // computing live ranges is the reverse of that order.
@@ -131,30 +101,38 @@
   // to differentiate between the start and end of an instruction. Adding 2 to
   // the lifetime position for each instruction ensures the start of an
   // instruction is different than the end of the previous instruction.
-  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+  for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     block->SetLifetimeStart(lifetime_position);
-    lifetime_position += 2;
 
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
-      if (current->HasUses()) {
+      current->Accept(codegen_->GetLocationBuilder());
+      LocationSummary* locations = current->GetLocations();
+      if (locations != nullptr && locations->Out().IsValid()) {
         instructions_from_ssa_index_.Add(current);
         current->SetSsaIndex(ssa_index++);
         current->SetLiveInterval(
-            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType()));
+            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current));
       }
       current->SetLifetimePosition(lifetime_position);
     }
+    lifetime_position += 2;
+
+    // Add a null marker to notify we are starting a block.
+    instructions_from_lifetime_position_.Add(nullptr);
 
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
-      if (current->HasUses()) {
+      current->Accept(codegen_->GetLocationBuilder());
+      LocationSummary* locations = current->GetLocations();
+      if (locations != nullptr && locations->Out().IsValid()) {
         instructions_from_ssa_index_.Add(current);
         current->SetSsaIndex(ssa_index++);
         current->SetLiveInterval(
-            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType()));
+            new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current));
       }
+      instructions_from_lifetime_position_.Add(current);
       current->SetLifetimePosition(lifetime_position);
       lifetime_position += 2;
     }
@@ -165,7 +143,7 @@
 }
 
 void SsaLivenessAnalysis::ComputeLiveness() {
-  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+  for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     block_infos_.Put(
         block->GetBlockId(),
@@ -186,7 +164,7 @@
 void SsaLivenessAnalysis::ComputeLiveRanges() {
   // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
-  for (HLinearPostOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+  for (HLinearPostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
 
     BitVector* kill = GetKillSet(*block);
@@ -201,7 +179,7 @@
       for (HInstructionIterator it(successor->GetPhis()); !it.Done(); it.Advance()) {
         HInstruction* phi = it.Current();
         HInstruction* input = phi->InputAt(phi_input_index);
-        input->GetLiveInterval()->AddPhiUse(phi, block);
+        input->GetLiveInterval()->AddPhiUse(phi, phi_input_index, block);
         // A phi input whose last user is the phi dies at the end of the predecessor block,
         // and not at the phi's lifetime position.
         live_in->SetBit(input->GetSsaIndex());
@@ -228,7 +206,7 @@
         HInstruction* input = current->InputAt(i);
         DCHECK(input->HasSsaIndex());
         live_in->SetBit(input->GetSsaIndex());
-        input->GetLiveInterval()->AddUse(current);
+        input->GetLiveInterval()->AddUse(current, i, false);
       }
 
       if (current->HasEnvironment()) {
@@ -239,7 +217,7 @@
           if (instruction != nullptr) {
             DCHECK(instruction->HasSsaIndex());
             live_in->SetBit(instruction->GetSsaIndex());
-            instruction->GetLiveInterval()->AddUse(current);
+            instruction->GetLiveInterval()->AddUse(current, i, true);
           }
         }
       }
@@ -251,6 +229,10 @@
       if (current->HasSsaIndex()) {
         kill->SetBit(current->GetSsaIndex());
         live_in->ClearBit(current->GetSsaIndex());
+        LiveInterval* interval = current->GetLiveInterval();
+        DCHECK((interval->GetFirstRange() == nullptr)
+               || (interval->GetStart() == current->GetLifetimePosition()));
+        interval->SetFrom(current->GetLifetimePosition());
       }
     }
 
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 4d56e1f..fc3eb66 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -21,6 +21,8 @@
 
 namespace art {
 
+class CodeGenerator;
+
 class BlockInfo : public ArenaObject {
  public:
   BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values)
@@ -87,9 +89,17 @@
  */
 class UsePosition : public ArenaObject {
  public:
-  UsePosition(HInstruction* user, size_t position, UsePosition* next)
-      : user_(user), position_(position), next_(next) {
-    DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition());
+  UsePosition(HInstruction* user,
+              size_t input_index,
+              bool is_environment,
+              size_t position,
+              UsePosition* next)
+      : user_(user),
+        input_index_(input_index),
+        is_environment_(is_environment),
+        position_(position),
+        next_(next) {
+    DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition() + 1);
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
@@ -99,12 +109,18 @@
 
   HInstruction* GetUser() const { return user_; }
 
-  void Dump(std::ostream& stream) {
+  bool GetIsEnvironment() const { return is_environment_; }
+
+  size_t GetInputIndex() const { return input_index_; }
+
+  void Dump(std::ostream& stream) const {
     stream << position_;
   }
 
  private:
   HInstruction* const user_;
+  const size_t input_index_;
+  const bool is_environment_;
   const size_t position_;
   UsePosition* const next_;
 
@@ -117,17 +133,33 @@
  */
 class LiveInterval : public ArenaObject {
  public:
-  LiveInterval(ArenaAllocator* allocator, Primitive::Type type)
+  LiveInterval(ArenaAllocator* allocator, Primitive::Type type, HInstruction* defined_by = nullptr)
       : allocator_(allocator),
         first_range_(nullptr),
         last_range_(nullptr),
         first_use_(nullptr),
         type_(type),
         next_sibling_(nullptr),
-        register_(kNoRegister) {}
+        parent_(this),
+        register_(kNoRegister),
+        spill_slot_(kNoSpillSlot),
+        is_fixed_(false),
+        defined_by_(defined_by) {}
 
-  void AddUse(HInstruction* instruction) {
-    size_t position = instruction->GetLifetimePosition();
+  static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) {
+    LiveInterval* interval = new (allocator) LiveInterval(allocator, type);
+    interval->SetRegister(reg);
+    interval->is_fixed_ = true;
+    return interval;
+  }
+
+  bool IsFixed() const { return is_fixed_; }
+
+  void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
+    // Set the use within the instruction.
+    // TODO: Use the instruction's location to know whether the instruction can die
+    // at entry, or needs to say alive within the user.
+    size_t position = instruction->GetLifetimePosition() + 1;
     size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
     size_t end_block_position = instruction->GetBlock()->GetLifetimeEnd();
     if (first_range_ == nullptr) {
@@ -140,15 +172,18 @@
       // Last use is in the following block.
       first_range_->start_ = start_block_position;
     } else {
+      DCHECK(first_range_->GetStart() > position);
       // There is a hole in the interval. Create a new range.
       first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_);
     }
-    first_use_ = new (allocator_) UsePosition(instruction, position, first_use_);
+    first_use_ = new (allocator_) UsePosition(
+        instruction, input_index, is_environment, position, first_use_);
   }
 
-  void AddPhiUse(HInstruction* instruction, HBasicBlock* block) {
+  void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
     DCHECK(instruction->AsPhi() != nullptr);
-    first_use_ = new (allocator_) UsePosition(instruction, block->GetLifetimeEnd(), first_use_);
+    first_use_ = new (allocator_) UsePosition(
+        instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
   }
 
   void AddRange(size_t start, size_t end) {
@@ -158,6 +193,7 @@
       // There is a use in the following block.
       first_range_->start_ = start;
     } else {
+      DCHECK(first_range_->GetStart() > end);
       // There is a hole in the interval. Create a new range.
       first_range_ = new (allocator_) LiveRange(start, end, first_range_);
     }
@@ -178,11 +214,23 @@
     }
   }
 
+  bool HasSpillSlot() const { return spill_slot_ != kNoSpillSlot; }
+  void SetSpillSlot(int slot) { spill_slot_ = slot; }
+  int GetSpillSlot() const { return spill_slot_; }
+
   void SetFrom(size_t from) {
-    DCHECK(first_range_ != nullptr);
-    first_range_->start_ = from;
+    if (first_range_ != nullptr) {
+      first_range_->start_ = from;
+    } else {
+      // Instruction without uses.
+      DCHECK(!defined_by_->HasUses());
+      DCHECK(from == defined_by_->GetLifetimePosition());
+      first_range_ = last_range_ = new (allocator_) LiveRange(from, from + 2, nullptr);
+    }
   }
 
+  LiveInterval* GetParent() const { return parent_; }
+
   LiveRange* GetFirstRange() const { return first_range_; }
 
   int GetRegister() const { return register_; }
@@ -190,11 +238,11 @@
   void ClearRegister() { register_ = kNoRegister; }
   bool HasRegister() const { return register_ != kNoRegister; }
 
-  bool IsDeadAt(size_t position) {
+  bool IsDeadAt(size_t position) const {
     return last_range_->GetEnd() <= position;
   }
 
-  bool Covers(size_t position) {
+  bool Covers(size_t position) const {
     LiveRange* current = first_range_;
     while (current != nullptr) {
       if (position >= current->GetStart() && position < current->GetEnd()) {
@@ -208,27 +256,10 @@
   /**
    * Returns the first intersection of this interval with `other`.
    */
-  size_t FirstIntersectionWith(LiveInterval* other) {
-    // We only call this method if there is a lifetime hole in this interval
-    // at the start of `other`.
-    DCHECK(!Covers(other->GetStart()));
-    DCHECK_LE(GetStart(), other->GetStart());
-    // Move to the range in this interval that starts after the other interval.
-    size_t other_start = other->GetStart();
-    LiveRange* my_range = first_range_;
-    while (my_range != nullptr) {
-      if (my_range->GetStart() >= other_start) {
-        break;
-      } else {
-        my_range = my_range->GetNext();
-      }
-    }
-    if (my_range == nullptr) {
-      return kNoLifetime;
-    }
-
+  size_t FirstIntersectionWith(LiveInterval* other) const {
     // Advance both intervals and find the first matching range start in
     // this interval.
+    LiveRange* my_range = first_range_;
     LiveRange* other_range = other->first_range_;
     do {
       if (my_range->IntersectsWith(*other_range)) {
@@ -252,16 +283,33 @@
     return first_range_->GetStart();
   }
 
+  size_t GetEnd() const {
+    return last_range_->GetEnd();
+  }
+
   size_t FirstRegisterUseAfter(size_t position) const {
+    if (position == GetStart() && defined_by_ != nullptr) {
+      Location location = defined_by_->GetLocations()->Out();
+      // This interval is the first interval of the instruction. If the output
+      // of the instruction requires a register, we return the position of that instruction
+      // as the first register use.
+      if (location.IsUnallocated()) {
+        if ((location.GetPolicy() == Location::kRequiresRegister)
+             || (location.GetPolicy() == Location::kSameAsFirstInput
+                && defined_by_->GetLocations()->InAt(0).GetPolicy() == Location::kRequiresRegister)) {
+          return position;
+        }
+      }
+    }
+
     UsePosition* use = first_use_;
     while (use != nullptr) {
       size_t use_position = use->GetPosition();
-      // TODO: Once we plug the Locations builder of the code generator
-      // to the register allocator, this method must be adjusted. We
-      // test if there is an environment, because these are currently the only
-      // instructions that could have more uses than the number of registers.
-      if (use_position >= position && !use->GetUser()->NeedsEnvironment()) {
-        return use_position;
+      if (use_position >= position && !use->GetIsEnvironment()) {
+        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
+        if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+          return use_position;
+        }
       }
       use = use->GetNext();
     }
@@ -272,10 +320,18 @@
     return FirstRegisterUseAfter(GetStart());
   }
 
+  UsePosition* GetFirstUse() const {
+    return first_use_;
+  }
+
   Primitive::Type GetType() const {
     return type_;
   }
 
+  HInstruction* GetDefinedBy() const {
+    return defined_by_;
+  }
+
   /**
    * Split this interval at `position`. This interval is changed to:
    * [start ... position).
@@ -284,7 +340,7 @@
    * [position ... end)
    */
   LiveInterval* SplitAt(size_t position) {
-    DCHECK(next_sibling_ == nullptr);
+    DCHECK(!is_fixed_);
     DCHECK_GT(position, GetStart());
 
     if (last_range_->GetEnd() <= position) {
@@ -293,7 +349,9 @@
     }
 
     LiveInterval* new_interval = new (allocator_) LiveInterval(allocator_, type_);
+    new_interval->next_sibling_ = next_sibling_;
     next_sibling_ = new_interval;
+    new_interval->parent_ = parent_;
 
     new_interval->first_use_ = first_use_;
     LiveRange* current = first_range_;
@@ -383,21 +441,36 @@
   // Live interval that is the result of a split.
   LiveInterval* next_sibling_;
 
+  // The first interval from which split intervals come from.
+  LiveInterval* parent_;
+
   // The register allocated to this interval.
   int register_;
 
+  // The spill slot allocated to this interval.
+  int spill_slot_;
+
+  // Whether the interval is for a fixed register.
+  bool is_fixed_;
+
+  // The instruction represented by this interval.
+  HInstruction* const defined_by_;
+
   static constexpr int kNoRegister = -1;
+  static constexpr int kNoSpillSlot = -1;
 
   DISALLOW_COPY_AND_ASSIGN(LiveInterval);
 };
 
 class SsaLivenessAnalysis : public ValueObject {
  public:
-  explicit SsaLivenessAnalysis(const HGraph& graph)
+  SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen)
       : graph_(graph),
+        codegen_(codegen),
         linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()),
         block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
         instructions_from_ssa_index_(graph.GetArena(), 0),
+        instructions_from_lifetime_position_(graph.GetArena(), 0),
         number_of_ssa_values_(0) {
     block_infos_.SetSize(graph.GetBlocks().Size());
   }
@@ -424,6 +497,14 @@
     return instructions_from_ssa_index_.Get(index);
   }
 
+  HInstruction* GetInstructionFromPosition(size_t index) const {
+    return instructions_from_lifetime_position_.Get(index);
+  }
+
+  size_t GetMaxLifetimePosition() const {
+    return instructions_from_lifetime_position_.Size() * 2 - 1;
+  }
+
   size_t GetNumberOfSsaValues() const {
     return number_of_ssa_values_;
   }
@@ -458,14 +539,52 @@
   bool UpdateLiveOut(const HBasicBlock& block);
 
   const HGraph& graph_;
+  CodeGenerator* const codegen_;
   GrowableArray<HBasicBlock*> linear_post_order_;
   GrowableArray<BlockInfo*> block_infos_;
+
+  // Temporary array used when computing live_in, live_out, and kill sets.
   GrowableArray<HInstruction*> instructions_from_ssa_index_;
+
+  // Temporary array used when inserting moves in the graph.
+  GrowableArray<HInstruction*> instructions_from_lifetime_position_;
   size_t number_of_ssa_values_;
 
   DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
 };
 
+class HLinearOrderIterator : public ValueObject {
+ public:
+  explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness)
+      : post_order_(liveness.GetLinearPostOrder()), index_(liveness.GetLinearPostOrder().Size()) {}
+
+  bool Done() const { return index_ == 0; }
+  HBasicBlock* Current() const { return post_order_.Get(index_ -1); }
+  void Advance() { --index_; DCHECK_GE(index_, 0U); }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
+};
+
+class HLinearPostOrderIterator : public ValueObject {
+ public:
+  explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness)
+      : post_order_(liveness.GetLinearPostOrder()), index_(0) {}
+
+  bool Done() const { return index_ == post_order_.Size(); }
+  HBasicBlock* Current() const { return post_order_.Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 485ea27..088a5c4 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -87,6 +87,13 @@
   graph->TransformToSSA();
   ReNumberInstructions(graph);
 
+  // Test that phis had their type set.
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    for (HInstructionIterator it(graph->GetBlocks().Get(i)->GetPhis()); !it.Done(); it.Advance()) {
+      ASSERT_NE(it.Current()->GetType(), Primitive::kPrimVoid);
+    }
+  }
+
   SsaPrettyPrinter printer(graph);
   printer.VisitInsertionOrder();
 
@@ -99,7 +106,7 @@
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [2, 2]\n"
     "  1: Goto\n"
-    "BasicBlock 1, pred: 0, succ: 2, 5\n"
+    "BasicBlock 1, pred: 0, succ: 5, 2\n"
     "  2: Equal(0, 0) [3]\n"
     "  3: If(2)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
@@ -129,7 +136,7 @@
     "  0: IntConstant 0 [6, 3, 3]\n"
     "  1: IntConstant 4 [6]\n"
     "  2: Goto\n"
-    "BasicBlock 1, pred: 0, succ: 2, 5\n"
+    "BasicBlock 1, pred: 0, succ: 5, 2\n"
     "  3: Equal(0, 0) [4]\n"
     "  4: If(3)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
@@ -409,7 +416,7 @@
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
-    "BasicBlock 2, pred: 1, 5, succ: 3, 8\n"
+    "BasicBlock 2, pred: 1, 5, succ: 8, 3\n"
     "  5: Phi(0, 1) [12, 6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
@@ -467,7 +474,7 @@
     "  0: IntConstant 0 [3, 3]\n"
     "  1: IntConstant 4\n"
     "  2: Goto\n"
-    "BasicBlock 1, pred: 0, succ: 2, 5\n"
+    "BasicBlock 1, pred: 0, succ: 5, 2\n"
     "  3: Equal(0, 0) [4]\n"
     "  4: If(3)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
@@ -489,4 +496,43 @@
   TestCode(data, expected);
 }
 
+TEST(SsaTest, MultiplePredecessors) {
+  // Test that we do not create a phi when one predecessor
+  // does not update the local.
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  0: IntConstant 0 [4, 8, 6, 6, 2, 2, 8, 4]\n"
+    "  1: Goto\n"
+    "BasicBlock 1, pred: 0, succ: 3, 2\n"
+    "  2: Equal(0, 0) [3]\n"
+    "  3: If(2)\n"
+    "BasicBlock 2, pred: 1, succ: 5\n"
+    "  4: Add(0, 0)\n"
+    "  5: Goto\n"
+    "BasicBlock 3, pred: 1, succ: 7, 4\n"
+    "  6: Equal(0, 0) [7]\n"
+    "  7: If(6)\n"
+    "BasicBlock 4, pred: 3, succ: 5\n"
+    "  8: Add(0, 0)\n"
+    "  9: Goto\n"
+    // This block should not get a phi for local 1.
+    "BasicBlock 5, pred: 2, 4, 7, succ: 6\n"
+    "  10: ReturnVoid\n"
+    "BasicBlock 6, pred: 5\n"
+    "  11: Exit\n"
+    "BasicBlock 7, pred: 3, succ: 5\n"
+    "  12: Goto\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 5,
+    Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8,
+    Instruction::GOTO | 0x0500,
+    Instruction::IF_EQ, 4,
+    Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc
new file mode 100644
index 0000000..53fa74e
--- /dev/null
+++ b/compiler/optimizing/ssa_type_propagation.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ssa_type_propagation.h"
+
+#include "nodes.h"
+
+namespace art {
+
+static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
+  // We trust the verifier has already done the necessary checking.
+  switch (existing) {
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimNot:
+      return existing;
+    default:
+      return new_type;
+  }
+}
+
+// Re-compute and update the type of the instruction. Returns
+// whether or not the type was changed.
+static bool UpdateType(HPhi* phi) {
+  Primitive::Type existing = phi->GetType();
+
+  Primitive::Type new_type = Primitive::kPrimVoid;
+  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+    Primitive::Type input_type = phi->InputAt(i)->GetType();
+    new_type = MergeTypes(new_type, input_type);
+  }
+  phi->SetType(new_type);
+  return existing != new_type;
+}
+
+void SsaTypePropagation::Run() {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
+  }
+  ProcessWorklist();
+}
+
+void SsaTypePropagation::VisitBasicBlock(HBasicBlock* block) {
+  if (block->IsLoopHeader()) {
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      // Set the initial type for the phi. Use the non back edge input for reaching
+      // a fixed point faster.
+      phi->SetType(phi->InputAt(0)->GetType());
+      AddToWorklist(phi);
+    }
+  } else {
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      if (UpdateType(phi)) {
+        AddDependentInstructionsToWorklist(phi);
+      }
+    }
+  }
+}
+
+void SsaTypePropagation::ProcessWorklist() {
+  while (!worklist_.IsEmpty()) {
+    HPhi* instruction = worklist_.Pop();
+    if (UpdateType(instruction)) {
+      AddDependentInstructionsToWorklist(instruction);
+    }
+  }
+}
+
+void SsaTypePropagation::AddToWorklist(HPhi* instruction) {
+  worklist_.Add(instruction);
+}
+
+void SsaTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
+  for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+    HPhi* phi = it.Current()->GetUser()->AsPhi();
+    if (phi != nullptr) {
+      AddToWorklist(phi);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h
new file mode 100644
index 0000000..5f471a9
--- /dev/null
+++ b/compiler/optimizing/ssa_type_propagation.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_
+#define ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// Compute and propagate types of phis in the graph.
+class SsaTypePropagation : public ValueObject {
+ public:
+  explicit SsaTypePropagation(HGraph* graph)
+      : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+
+  void Run();
+
+ private:
+  void VisitBasicBlock(HBasicBlock* block);
+  void ProcessWorklist();
+  void AddToWorklist(HPhi* phi);
+  void AddDependentInstructionsToWorklist(HPhi* phi);
+
+  HGraph* const graph_;
+  GrowableArray<HPhi*> worklist_;
+
+  static constexpr size_t kDefaultWorklistSize = 8;
+
+  DISALLOW_COPY_AND_ASSIGN(SsaTypePropagation);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
index ca4635d..da49524 100644
--- a/compiler/utils/arena_allocator.cc
+++ b/compiler/utils/arena_allocator.cc
@@ -32,10 +32,11 @@
 constexpr size_t Arena::kDefaultSize;
 
 template <bool kCount>
-const char* ArenaAllocatorStatsImpl<kCount>::kAllocNames[kNumArenaAllocKinds] = {
+const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
   "Misc       ",
   "BasicBlock ",
   "LIR        ",
+  "LIR masks  ",
   "MIR        ",
   "DataFlow   ",
   "GrowList   ",
@@ -101,6 +102,7 @@
        << num_allocations << ", avg size: " << bytes_allocated / num_allocations << "\n";
   }
   os << "===== Allocation by kind\n";
+  COMPILE_ASSERT(arraysize(kAllocNames) == kNumArenaAllocKinds, check_arraysize_kAllocNames);
   for (int i = 0; i < kNumArenaAllocKinds; i++) {
       os << kAllocNames[i] << std::setw(10) << alloc_stats_[i] << "\n";
   }
@@ -139,7 +141,7 @@
     if (kUseMemSet || !kUseMemMap) {
       memset(Begin(), 0, bytes_allocated_);
     } else {
-      madvise(Begin(), bytes_allocated_, MADV_DONTNEED);
+      map_->MadviseDontNeedAndZero();
     }
     bytes_allocated_ = 0;
   }
@@ -215,7 +217,7 @@
 }
 
 void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = (bytes + 3 + kValgrindRedZoneBytes) & ~3;
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
   if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
     // Obtain a new block.
     ObtainNewArenaForAllocation(rounded_bytes);
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index dbe482d..f4bcb1d 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -41,6 +41,7 @@
   kArenaAllocMisc,
   kArenaAllocBB,
   kArenaAllocLIR,
+  kArenaAllocLIRResourceMask,
   kArenaAllocMIR,
   kArenaAllocDFInfo,
   kArenaAllocGrowableArray,
@@ -92,7 +93,7 @@
   // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL.
   size_t alloc_stats_[kNumArenaAllocKinds];  // Bytes used by various allocation kinds.
 
-  static const char* kAllocNames[kNumArenaAllocKinds];
+  static const char* const kAllocNames[];
 };
 
 typedef ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations> ArenaAllocatorStats;
@@ -156,7 +157,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    bytes = RoundUp(bytes, 4);
+    bytes = RoundUp(bytes, 8);
     if (UNLIKELY(ptr_ + bytes > end_)) {
       // Obtain a new block.
       ObtainNewArenaForAllocation(bytes);
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index b8b0e6e..aeb2f76 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -92,7 +92,7 @@
 }
 
 void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 4);
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
index c090062..37799cb 100644
--- a/compiler/utils/scoped_arena_allocator.h
+++ b/compiler/utils/scoped_arena_allocator.h
@@ -67,7 +67,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    size_t rounded_bytes = RoundUp(bytes, 4);
+    size_t rounded_bytes = RoundUp(bytes, 8);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index a14551c..41d1529 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -138,8 +138,8 @@
 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
-  EmitUint8(0x89);
-  EmitRegisterOperand(src.LowBits(), dst.LowBits());
+  EmitUint8(0x8B);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
 }
 
 
@@ -821,6 +821,15 @@
   EmitRegisterOperand(dst.LowBits(), src.LowBits());
 }
 
+
+void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x87);
+  EmitOperand(dst.LowBits(), Operand(src));
+}
+
+
 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
@@ -1650,7 +1659,7 @@
     pushq(spill_regs.at(i).AsX86_64().AsCpuRegister());
   }
   // return address then method on stack
-  addq(CpuRegister(RSP), Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
+  addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(frame_size) + (spill_regs.size() * kFramePointerSize) +
                                    sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
                                    kFramePointerSize /*return address*/));
 
@@ -1682,7 +1691,7 @@
 void X86_64Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
+  addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     popq(spill_regs.at(i).AsX86_64().AsCpuRegister());
   }
@@ -1691,7 +1700,7 @@
 
 void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(-adjust));
+  addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
 }
 
 void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 548d379..9aa5a54 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -29,6 +29,13 @@
 namespace art {
 namespace x86_64 {
 
+// Encodes an immediate value for operands.
+//
+// Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
+// to 32b.
+//
+// Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
+// conversion rules in expressions regarding negation, especially size_t on 32b.
 class Immediate {
  public:
   explicit Immediate(int64_t value) : value_(value) {}
@@ -368,6 +375,7 @@
   void fptan();
 
   void xchgl(CpuRegister dst, CpuRegister src);
+  void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
   void cmpl(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 7201d04..f7bad8b 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -125,6 +125,16 @@
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
 }
 
+TEST_F(AssemblerX86_64Test, Movl) {
+  GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11));
+  GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11));
+  const char* expected =
+    "movl %R11d, %R8d\n"
+    "movl %R11d, %EAX\n";
+
+  DriverStr(expected, "movl");
+}
+
 
 std::string setcc_test_fn(x86_64::X86_64Assembler* assembler) {
   // From Condition
@@ -200,4 +210,122 @@
   DriverFn(&setcc_test_fn, "setcc");
 }
 
+static x86_64::X86_64ManagedRegister ManagedFromCpu(x86_64::Register r) {
+  return x86_64::X86_64ManagedRegister::FromCpuRegister(r);
+}
+
+static x86_64::X86_64ManagedRegister ManagedFromFpu(x86_64::FloatRegister r) {
+  return x86_64::X86_64ManagedRegister::FromXmmRegister(r);
+}
+
+std::string buildframe_test_fn(x86_64::X86_64Assembler* assembler) {
+  // TODO: more interesting spill registers / entry spills.
+
+  // Two random spill regs.
+  std::vector<ManagedRegister> spill_regs;
+  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
+  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+
+  // Three random entry spills.
+  ManagedRegisterEntrySpills entry_spills;
+  ManagedRegisterSpill spill(ManagedFromCpu(x86_64::RAX), 8, 0);
+  entry_spills.push_back(spill);
+  ManagedRegisterSpill spill2(ManagedFromCpu(x86_64::RBX), 8, 8);
+  entry_spills.push_back(spill2);
+  ManagedRegisterSpill spill3(ManagedFromFpu(x86_64::XMM1), 8, 16);
+  entry_spills.push_back(spill3);
+
+  x86_64::X86_64ManagedRegister method_reg = ManagedFromCpu(x86_64::RDI);
+
+  size_t frame_size = 10 * kStackAlignment;
+  assembler->BuildFrame(10 * kStackAlignment, method_reg, spill_regs, entry_spills);
+
+  // Construct assembly text counterpart.
+  std::ostringstream str;
+  // 1) Push the spill_regs.
+  str << "pushq %rsi\n";
+  str << "pushq %r10\n";
+  // 2) Move down the stack pointer.
+  ssize_t displacement = -static_cast<ssize_t>(frame_size) + spill_regs.size() * 8 +
+      sizeof(StackReference<mirror::ArtMethod>) + 8;
+  str << "addq $" << displacement << ", %rsp\n";
+  // 3) Make space for method reference, and store it.
+  str << "subq $4, %rsp\n";
+  str << "movl %edi, (%rsp)\n";
+  // 4) Entry spills.
+  str << "movq %rax, " << frame_size + 0 << "(%rsp)\n";
+  str << "movq %rbx, " << frame_size + 8 << "(%rsp)\n";
+  str << "movsd %xmm1, " << frame_size + 16 << "(%rsp)\n";
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, BuildFrame) {
+  DriverFn(&buildframe_test_fn, "BuildFrame");
+}
+
+std::string removeframe_test_fn(x86_64::X86_64Assembler* assembler) {
+  // TODO: more interesting spill registers / entry spills.
+
+  // Two random spill regs.
+  std::vector<ManagedRegister> spill_regs;
+  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
+  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+
+  size_t frame_size = 10 * kStackAlignment;
+  assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
+
+  // Construct assembly text counterpart.
+  std::ostringstream str;
+  // 1) Move up the stack pointer.
+  ssize_t displacement = static_cast<ssize_t>(frame_size) - spill_regs.size() * 8 - 8;
+  str << "addq $" << displacement << ", %rsp\n";
+  // 2) Pop spill regs.
+  str << "popq %r10\n";
+  str << "popq %rsi\n";
+  str << "ret\n";
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, RemoveFrame) {
+  DriverFn(&removeframe_test_fn, "RemoveFrame");
+}
+
+std::string increaseframe_test_fn(x86_64::X86_64Assembler* assembler) {
+  assembler->IncreaseFrameSize(0U);
+  assembler->IncreaseFrameSize(kStackAlignment);
+  assembler->IncreaseFrameSize(10 * kStackAlignment);
+
+  // Construct assembly text counterpart.
+  std::ostringstream str;
+  str << "addq $0, %rsp\n";
+  str << "addq $-" << kStackAlignment << ", %rsp\n";
+  str << "addq $-" << 10 * kStackAlignment << ", %rsp\n";
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, IncreaseFrame) {
+  DriverFn(&increaseframe_test_fn, "IncreaseFrame");
+}
+
+std::string decreaseframe_test_fn(x86_64::X86_64Assembler* assembler) {
+  assembler->DecreaseFrameSize(0U);
+  assembler->DecreaseFrameSize(kStackAlignment);
+  assembler->DecreaseFrameSize(10 * kStackAlignment);
+
+  // Construct assembly text counterpart.
+  std::ostringstream str;
+  str << "addq $0, %rsp\n";
+  str << "addq $" << kStackAlignment << ", %rsp\n";
+  str << "addq $" << 10 * kStackAlignment << ", %rsp\n";
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, DecreaseFrame) {
+  DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 58a0379..ca9eae3 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -30,6 +30,7 @@
 class CpuRegister {
  public:
   explicit CpuRegister(Register r) : reg_(r) {}
+  explicit CpuRegister(int r) : reg_(Register(r)) {}
   Register AsRegister() const {
     return reg_;
   }
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 03d32f0..31fcd17 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -38,7 +38,6 @@
 
 ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 
-ifeq ($(WITH_HOST_DALVIK),true)
 include $(CLEAR_VARS)
 LOCAL_MODULE := dalvikvm
 LOCAL_MODULE_TAGS := optional
@@ -54,4 +53,3 @@
 include external/libcxx/libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
-endif
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index c17788e..28db711 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -36,12 +36,10 @@
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler,art/compiler,target,debug,$(dex2oat_arch)))
 endif
 
-ifeq ($(WITH_HOST_DALVIK),true)
-  # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
-  endif
+# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 35149cf..67ac729 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -203,6 +203,10 @@
   UsageError("");
   UsageError("  --dump-timing: display a breakdown of where time was spent");
   UsageError("");
+  UsageError("  --include-debug-symbols: Include ELF symbols in this oat file");
+  UsageError("");
+  UsageError("  --no-include-debug-symbols: Do not include ELF symbols in this oat file");
+  UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
@@ -816,6 +820,7 @@
   bool dump_stats = false;
   bool dump_timing = false;
   bool dump_passes = false;
+  bool include_debug_symbols = kIsDebugBuild;
   bool dump_slow_timing = kIsDebugBuild;
   bool watch_dog_enabled = !kIsTargetBuild;
   bool generate_gdb_information = kIsDebugBuild;
@@ -969,6 +974,10 @@
       dump_passes = true;
     } else if (option == "--dump-stats") {
       dump_stats = true;
+    } else if (option == "--include-debug-symbols" || option == "--no-strip-symbols") {
+      include_debug_symbols = true;
+    } else if (option == "--no-include-debug-symbols" || option == "--strip-symbols") {
+      include_debug_symbols = false;
     } else if (option.starts_with("--profile-file=")) {
       profile_file = option.substr(strlen("--profile-file=")).data();
       VLOG(compiler) << "dex2oat: profile file is " << profile_file;
@@ -1084,7 +1093,7 @@
   }
 
   if (compiler_filter_string == nullptr) {
-    if (instruction_set == kX86_64 || instruction_set == kArm64 || instruction_set == kMips) {
+    if ((instruction_set == kX86_64 && image) || instruction_set == kArm64 || instruction_set == kMips) {
       // TODO: implement/fix compilers for these architectures.
       compiler_filter_string = "interpret-only";
     } else if (image) {
@@ -1122,7 +1131,8 @@
                                    tiny_method_threshold,
                                    num_dex_methods_threshold,
                                    generate_gdb_information,
-                                   top_k_profile_threshold
+                                   top_k_profile_threshold,
+                                   include_debug_symbols
 #ifdef ART_SEA_IR_MODE
                                    , compiler_options.sea_ir_ = true;
 #endif
@@ -1409,16 +1419,20 @@
   }
 
 #if ART_USE_PORTABLE_COMPILER  // We currently only generate symbols on Portable
-  timings.NewSplit("dex2oat ElfStripper");
-  // Strip unneeded sections for target
-  off_t seek_actual = lseek(oat_file->Fd(), 0, SEEK_SET);
-  CHECK_EQ(0, seek_actual);
-  std::string error_msg;
-  CHECK(ElfStripper::Strip(oat_file.get(), &error_msg)) << error_msg;
+  if (!compiler_options.GetIncludeDebugSymbols()) {
+    timings.NewSplit("dex2oat ElfStripper");
+    // Strip unneeded sections for target
+    off_t seek_actual = lseek(oat_file->Fd(), 0, SEEK_SET);
+    CHECK_EQ(0, seek_actual);
+    std::string error_msg;
+    CHECK(ElfStripper::Strip(oat_file.get(), &error_msg)) << error_msg;
 
 
-  // We wrote the oat file successfully, and want to keep it.
-  VLOG(compiler) << "Oat file written successfully (stripped): " << oat_location;
+    // We wrote the oat file successfully, and want to keep it.
+    VLOG(compiler) << "Oat file written successfully (stripped): " << oat_location;
+  } else {
+    VLOG(compiler) << "Oat file written successfully without stripping: " << oat_location;
+  }
 #endif  // ART_USE_PORTABLE_COMPILER
 
   timings.EndSplit();
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index b4b194d..feacbde 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -98,12 +98,10 @@
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
   $(eval $(call build-libart-disassembler,target,debug))
 endif
-ifeq ($(WITH_HOST_DALVIK),true)
-  # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-libart-disassembler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-libart-disassembler,host,debug))
-  endif
+# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-libart-disassembler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-libart-disassembler,host,debug))
 endif
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 456e3b5..e6a6860 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -258,6 +258,17 @@
     reg_in_opcode = true;
     target_specific = true;
     break;
+  case 0x63:
+    if (rex == 0x48) {
+      opcode << "movsxd";
+      has_modrm = true;
+      load = true;
+    } else {
+      // In 32-bit mode (!supports_rex_) this is ARPL, with no REX prefix the functionality is the
+      // same as 'mov' but the use of the instruction is discouraged.
+      opcode << StringPrintf("unknown opcode '%02X'", *instr);
+    }
+    break;
   case 0x68: opcode << "push"; immediate_bytes = 4; break;
   case 0x69: opcode << "imul"; load = true; has_modrm = true; immediate_bytes = 4; break;
   case 0x6A: opcode << "push"; immediate_bytes = 1; break;
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index 7cee00e..ecf6a0b 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -28,11 +28,9 @@
   $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libcutils libartd-disassembler,art/disassembler,target,debug))
 endif
 
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libart-disassembler,art/disassembler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_HOST_DEBUG),true)
-    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libartd-disassembler,art/disassembler,host,debug))
-  endif
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
+  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libart-disassembler,art/disassembler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libartd-disassembler,art/disassembler,host,debug))
 endif
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 183f667..12970fc 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -209,7 +209,6 @@
   }
 
   const void* GetQuickOatCode(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    MethodHelper mh(m);
     for (size_t i = 0; i < oat_dex_files_.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
@@ -220,7 +219,7 @@
             << "': " << error_msg;
       } else {
         const DexFile::ClassDef* class_def =
-            dex_file->FindClassDef(mh.GetDeclaringClassDescriptor());
+            dex_file->FindClassDef(m->GetDeclaringClassDescriptor());
         if (class_def != NULL) {
           uint16_t class_def_index = dex_file->GetIndexForClassDef(*class_def);
           const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(class_def_index);
@@ -919,10 +918,11 @@
 
   static void PrintField(std::ostream& os, mirror::ArtField* field, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    FieldHelper fh(field);
-    const char* descriptor = fh.GetTypeDescriptor();
-    os << StringPrintf("%s: ", fh.GetName());
+    const char* descriptor = field->GetTypeDescriptor();
+    os << StringPrintf("%s: ", field->GetName());
     if (descriptor[0] != 'L' && descriptor[0] != '[') {
+      StackHandleScope<1> hs(Thread::Current());
+      FieldHelper fh(hs.NewHandle(field));
       mirror::Class* type = fh.GetType();
       if (type->IsPrimitiveLong()) {
         os << StringPrintf("%" PRId64 " (0x%" PRIx64 ")\n", field->Get64(obj), field->Get64(obj));
@@ -942,6 +942,8 @@
         os << StringPrintf("null   %s\n", PrettyDescriptor(descriptor).c_str());
       } else {
         // Grab the field type without causing resolution.
+        StackHandleScope<1> hs(Thread::Current());
+        FieldHelper fh(hs.NewHandle(field));
         mirror::Class* field_type = fh.GetType(false);
         if (field_type != NULL) {
           PrettyObjectValue(os, field_type, value);
@@ -1091,11 +1093,11 @@
         }
       } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
           method->IsResolutionMethod() || method->IsImtConflictMethod() ||
-          MethodHelper(method).IsClassInitializer()) {
+          method->IsClassInitializer()) {
         DCHECK(method->GetNativeGcMap() == NULL) << PrettyMethod(method);
         DCHECK(method->GetMappingTable() == NULL) << PrettyMethod(method);
       } else {
-        const DexFile::CodeItem* code_item = MethodHelper(method).GetCodeItem();
+        const DexFile::CodeItem* code_item = method->GetCodeItem();
         size_t dex_instruction_bytes = code_item->insns_size_in_code_units_ * 2;
         state->stats_.dex_instruction_bytes += dex_instruction_bytes;
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7a832c1..c40ae7a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -26,6 +26,7 @@
 	base/hex_dump.cc \
 	base/logging.cc \
 	base/mutex.cc \
+	base/scoped_flock.cc \
 	base/stringpiece.cc \
 	base/stringprintf.cc \
 	base/timing_logger.cc \
@@ -157,6 +158,7 @@
 
 LIBART_COMMON_SRC_FILES += \
 	arch/context.cc \
+	arch/memcmp16.cc \
 	arch/arm/registers_arm.cc \
 	arch/arm64/registers_arm64.cc \
 	arch/x86/registers_x86.cc \
@@ -194,7 +196,8 @@
 LIBART_GCC_ONLY_SRC_FILES := \
 	interpreter/interpreter_goto_table_impl.cc
 
-LIBART_LDFLAGS := -Wl,--no-fatal-warnings
+LIBART_TARGET_LDFLAGS := -Wl,--no-fatal-warnings
+LIBART_HOST_LDFLAGS :=
 
 LIBART_TARGET_SRC_FILES := \
 	$(LIBART_COMMON_SRC_FILES) \
@@ -208,6 +211,7 @@
 	arch/arm/context_arm.cc.arm \
 	arch/arm/entrypoints_init_arm.cc \
 	arch/arm/jni_entrypoints_arm.S \
+	arch/arm/memcmp16_arm.S \
 	arch/arm/portable_entrypoints_arm.S \
 	arch/arm/quick_entrypoints_arm.S \
 	arch/arm/arm_sdiv.S \
@@ -253,6 +257,7 @@
 	arch/mips/context_mips.cc \
 	arch/mips/entrypoints_init_mips.cc \
 	arch/mips/jni_entrypoints_mips.S \
+	arch/mips/memcmp16_mips.S \
 	arch/mips/portable_entrypoints_mips.S \
 	arch/mips/quick_entrypoints_mips.S \
 	arch/mips/thread_mips.cc \
@@ -364,6 +369,11 @@
 
   LOCAL_CFLAGS := $(LIBART_CFLAGS)
   LOCAL_LDFLAGS := $(LIBART_LDFLAGS)
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_LDFLAGS += $(LIBART_TARGET_LDFLAGS)
+  else
+    LOCAL_LDFLAGS += $(LIBART_HOST_LDFLAGS)
+  endif
   $(foreach arch,$(ART_SUPPORTED_ARCH),
     LOCAL_LDFLAGS_$(arch) := $$(LIBART_TARGET_LDFLAGS_$(arch)))
 
@@ -438,13 +448,11 @@
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since
 # they are used to cross compile for the target.
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-libart,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-libart,host,debug))
-  endif
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-libart,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-libart,host,debug))
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index c4f68af..e1b0ce7 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
-#define ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
+#ifndef ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_S_
+#define ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_S_
 
 #include "asm_support_arm.h"
 
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 340a83e..ebceb63 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -102,7 +102,6 @@
 extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -213,7 +212,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/arm/memcmp16_arm.S b/runtime/arch/arm/memcmp16_arm.S
new file mode 100644
index 0000000..3762194
--- /dev/null
+++ b/runtime/arch/arm/memcmp16_arm.S
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
+#define ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
+
+#include "asm_support_arm.S"
+
+/*
+ * Optimized memcmp16() for ARM9.
+ * This would not be optimal on XScale or ARM11, where more prefetching
+ * and use of pld will be needed.
+ * The 2 major optimzations here are
+ * (1) The main loop compares 16 bytes at a time
+ * (2) The loads are scheduled in a way they won't stall
+ */
+
+ARM_ENTRY __memcmp16
+        pld         [r0, #0]
+        pld         [r1, #0]
+
+        /* take of the case where length is nul or the buffers are the same */
+        cmp         r0, r1
+        cmpne       r2, #0
+        moveq       r0, #0
+        bxeq        lr
+
+        /* since r0 hold the result, move the first source
+         * pointer somewhere else
+         */
+
+        mov         r3, r0
+
+         /* make sure we have at least 12 words, this simplify things below
+          * and avoid some overhead for small blocks
+          */
+
+        cmp         r2, #12
+        bpl         0f
+
+        /* small blocks (less then 12 words) */
+        pld         [r0, #32]
+        pld         [r1, #32]
+
+1:      ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        subs        r0, r0, ip
+        bxne        lr
+        subs        r2, r2, #1
+        bne         1b
+        bx          lr
+
+
+        /* save registers */
+0:      stmfd       sp!, {r4, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset lr, 4
+
+        /* align first pointer to word boundary */
+        tst         r3, #2
+        beq         0f
+
+        ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        sub         r2, r2, #1
+        subs        r0, r0, ip
+        /* restore registers and return */
+        ldmnefd     sp!, {r4, lr}
+        bxne        lr
+
+
+0:      /* here the first pointer is aligned, and we have at least 3 words
+         * to process.
+         */
+
+        /* see if the pointers are congruent */
+        eor         r0, r3, r1
+        ands        r0, r0, #2
+        bne         5f
+
+        /* congruent case, 16 half-words per iteration
+         * We need to make sure there are at least 16+2 words left
+         * because we effectively read ahead one long word, and we could
+         * read past the buffer (and segfault) if we're not careful.
+         */
+
+        ldr         ip, [r1]
+        subs        r2, r2, #(16 + 2)
+        bmi         1f
+
+0:
+        pld         [r3, #64]
+        pld         [r1, #64]
+        ldr         r0, [r3], #4
+        ldr         lr, [r1, #4]!
+        eors        r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        bne         2f
+        subs        r2, r2, #16
+        bhs         0b
+
+        /* do we have at least 2 words left? */
+1:      adds        r2, r2, #(16 - 2 + 2)
+        bmi         4f
+
+        /* finish off 2 words at a time */
+3:      ldr         r0, [r3], #4
+        ldr         ip, [r1], #4
+        eors        r0, r0, ip
+        bne         2f
+        subs        r2, r2, #2
+        bhs         3b
+
+        /* are we done? */
+4:      adds        r2, r2, #2
+        bne         8f
+        /* restore registers and return */
+        mov         r0, #0
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+2:      /* the last 2 words are different, restart them */
+        ldrh        r0, [r3, #-4]
+        ldrh        ip, [r1, #-4]
+        subs        r0, r0, ip
+        ldreqh      r0, [r3, #-2]
+        ldreqh      ip, [r1, #-2]
+        subeqs      r0, r0, ip
+        /* restore registers and return */
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+        /* process the last few words */
+8:      ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        subs        r0, r0, ip
+        bne         9f
+        subs        r2, r2, #1
+        bne         8b
+
+9:      /* restore registers and return */
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+
+5:      /*************** non-congruent case ***************/
+
+        /* align the unaligned pointer */
+        bic         r1, r1, #3
+        ldr         lr, [r1], #4
+        sub         r2, r2, #8
+
+6:
+        pld         [r3, #64]
+        pld         [r1, #64]
+        mov         ip, lr, lsr #16
+        ldr         lr, [r1], #4
+        ldr         r0, [r3], #4
+        orr         ip, ip, lr, lsl #16
+        eors        r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        bne         7f
+        subs        r2, r2, #8
+        bhs         6b
+        sub         r1, r1, #2
+        /* are we done? */
+        adds        r2, r2, #8
+        moveq       r0, #0
+        beq         9b
+        /* finish off the remaining bytes */
+        b           8b
+
+7:      /* fix up the 2 pointers and fallthrough... */
+        sub         r1, r1, #2
+        b           2b
+END __memcmp16
+
+
+#endif  // ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 8d08190..83cacac 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -20,6 +20,7 @@
 #include "quick/quick_method_frame_info.h"
 #include "registers_arm.h"
 #include "runtime.h"  // for Runtime::CalleeSaveType.
+#include "utils.h"
 
 namespace art {
 namespace arm {
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 46e819e..84ee778 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -85,7 +85,6 @@
 extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -199,7 +198,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 69f5957..d704788 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -196,6 +196,11 @@
     .cfi_adjust_cfa_offset -176
 .endm
 
+.macro POP_REF_ONLY_CALLEE_SAVE_FRAME
+    add sp, sp, #176
+    .cfi_adjust_cfa_offset -176
+.endm
+
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     ret
@@ -479,7 +484,7 @@
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
-    ldr    x2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE]  // pass caller Method*
+    ldr    w2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE]  // pass caller Method*
     mov    x3, xSELF                      // pass Thread::Current
     mov    x4, sp
     bl     \cxx_name                      // (method_idx, this, caller, Thread*, SP)
@@ -600,12 +605,12 @@
     str x0, [x4]
 
 .Lexit_art_quick_invoke_stub\@:
-    ldp x2, x19, [x29, #32]   // Restore stack pointer and x19.
+    ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
     .cfi_restore x19
     mov sp, x2
     .cfi_restore sp
 
-    ldp x29, x30, [x29]    // Restore old frame pointer and link register.
+    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
     .cfi_restore x29
     .cfi_restore x30
 
@@ -1283,8 +1288,8 @@
      */
 TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
-UNIMPLEMENTED art_quick_initialize_type
-UNIMPLEMENTED art_quick_initialize_type_and_verify_access
+TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
@@ -1577,10 +1582,170 @@
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
-UNIMPLEMENTED art_quick_instrumentation_entry
-UNIMPLEMENTED art_quick_instrumentation_exit
-UNIMPLEMENTED art_quick_deoptimize
-UNIMPLEMENTED art_quick_indexof
+
+//
+// Instrumentation-related stubs
+//
+    .extern artInstrumentationMethodEntryFromCode
+ENTRY art_quick_instrumentation_entry
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    mov   x19, x0             // Preserve method reference in a callee-save.
+
+    mov   x2, xSELF
+    mov   x3, sp
+    mov   x4, xLR
+    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP, LR)
+
+    mov   x9, x0              // x0 = result of call.
+    mov   x0, x19             // Reload method reference.
+
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
+    adr   xLR, art_quick_instrumentation_exit
+    br    x9                 // Tail-call method with lr set to art_quick_instrumentation_exit.
+END art_quick_instrumentation_entry
+
+    .extern artInstrumentationMethodExitFromCode
+ENTRY art_quick_instrumentation_exit
+    mov   xLR, #0             // Clobber LR for later checks.
+
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+
+    // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
+    // we would need to fully restore it. As there are a lot of callee-save registers, it seems
+    // easier to have an extra small stack area.
+
+    str x19, [sp, #-16]!      // Save integer result.
+    .cfi_adjust_cfa_offset 16
+    str d0,  [sp, #8]         // Save floating-point result.
+
+    mov   x0, xSELF           // Pass Thread.
+    add   x1, sp, #16         // Pass SP.
+    mov   x2, x0              // Pass integer result.
+    fmov  x3, d0              // Pass floating-point result.
+    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
+
+    mov   x9, x0              // Return address from instrumentation call.
+    mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
+
+    ldr   d0, [sp, #8]        // Restore floating-point result.
+    ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
+    .cfi_adjust_cfa_offset 16
+
+    POP_REF_ONLY_CALLEE_SAVE_FRAME
+
+    br    x9                  // Tail-call out.
+END art_quick_instrumentation_exit
+
+    /*
+     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+    .extern artDeoptimize
+ENTRY art_quick_deoptimize
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov    x0, xSELF          // Pass thread.
+    mov    x1, sp             // Pass SP.
+    bl     artDeoptimize      // artDeoptimize(Thread*, SP)
+END art_quick_deoptimize
+
+
+    /*
+     * String's indexOf.
+     *
+     * TODO: Not very optimized.
+     * On entry:
+     *    x0:   string object (known non-null)
+     *    w1:   char to match (known <= 0xFFFF)
+     *    w2:   Starting offset in string data
+     */
+ENTRY art_quick_indexof
+    ldr   w3, [x0, #STRING_COUNT_OFFSET]
+    ldr   w4, [x0, #STRING_OFFSET_OFFSET]
+    ldr   w0, [x0, #STRING_VALUE_OFFSET] // x0 ?
+
+    /* Clamp start to [0..count] */
+    cmp   w2, #0
+    csel  w2, wzr, w2, lt
+    cmp   w2, w3
+    csel  w2, w3, w2, gt
+
+    /* Build a pointer to the start of the string data */
+    add   x0, x0, #STRING_DATA_OFFSET
+    add   x0, x0, x4, lsl #1
+
+    /* Save a copy to compute result */
+    mov   x5, x0
+
+    /* Build pointer to start of data to compare and pre-bias */
+    add   x0, x0, x2, lsl #1
+    sub   x0, x0, #2
+
+    /* Compute iteration count */
+    sub   w2, w3, w2
+
+    /*
+     * At this point we have:
+     *  x0: start of the data to test
+     *  w1: char to compare
+     *  w2: iteration count
+     *  x5: original start of string data
+     */
+
+    subs  w2, w2, #4
+    b.lt  .Lindexof_remainder
+
+.Lindexof_loop4:
+    ldrh  w6, [x0, #2]!
+    ldrh  w7, [x0, #2]!
+    ldrh  w8, [x0, #2]!
+    ldrh  w9, [x0, #2]!
+    cmp   w6, w1
+    b.eq  .Lmatch_0
+    cmp   w7, w1
+    b.eq  .Lmatch_1
+    cmp   w8, w1
+    b.eq  .Lmatch_2
+    cmp   w9, w1
+    b.eq  .Lmatch_3
+    subs  w2, w2, #4
+    b.ge  .Lindexof_loop4
+
+.Lindexof_remainder:
+    adds  w2, w2, #4
+    b.eq  .Lindexof_nomatch
+
+.Lindexof_loop1:
+    ldrh  w6, [x0, #2]!
+    cmp   w6, w1
+    b.eq  .Lmatch_3
+    subs  w2, w2, #1
+    b.ne  .Lindexof_loop1
+
+.Lindexof_nomatch:
+    mov   x0, #-1
+    ret
+
+.Lmatch_0:
+    sub   x0, x0, #6
+    sub   x0, x0, x5
+    asr   x0, x0, #1
+    ret
+.Lmatch_1:
+    sub   x0, x0, #4
+    sub   x0, x0, x5
+    asr   x0, x0, #1
+    ret
+.Lmatch_2:
+    sub   x0, x0, #2
+    sub   x0, x0, x5
+    asr   x0, x0, #1
+    ret
+.Lmatch_3:
+    sub   x0, x0, x5
+    asr   x0, x0, #1
+    ret
+END art_quick_indexof
 
    /*
      * String's compareTo.
@@ -1592,7 +1757,7 @@
      *    x1:   comp object pointer
      *
      */
-    .extern __memcmp16
+    .extern memcmp16_generic_static
 ENTRY art_quick_string_compareto
     mov    x2, x0         // x0 is return, use x2 for first input.
     sub    x0, x2, x1     // Same string object?
@@ -1628,6 +1793,7 @@
     add x2, x2, #STRING_DATA_OFFSET
     add x1, x1, #STRING_DATA_OFFSET
 
+    // TODO: Tune this value.
     // Check for long string, do memcmp16 for them.
     cmp w3, #28  // Constant from arm32.
     bgt .Ldo_memcmp16
@@ -1688,7 +1854,7 @@
 
     mov x0, x2
     uxtw x2, w3
-    bl __memcmp16
+    bl memcmp16_generic_static
 
     ldr x1, [sp], #16            // Restore old x0 = length diff
 
diff --git a/runtime/arch/memcmp16.cc b/runtime/arch/memcmp16.cc
new file mode 100644
index 0000000..7928085
--- /dev/null
+++ b/runtime/arch/memcmp16.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "memcmp16.h"
+
+// This linked against by assembly stubs, only.
+#pragma GCC diagnostic ignored "-Wunused-function"
+
+int32_t memcmp16_generic_static(const uint16_t* s0, const uint16_t* s1, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    if (s0[i] != s1[i]) {
+      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+    }
+  }
+  return 0;
+}
+
+#pragma GCC diagnostic warning "-Wunused-function"
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
new file mode 100644
index 0000000..ad58588
--- /dev/null
+++ b/runtime/arch/memcmp16.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MEMCMP16_H_
+#define ART_RUNTIME_ARCH_MEMCMP16_H_
+
+#include <cstddef>
+#include <cstdint>
+
+// memcmp16 support.
+//
+// This can either be optimized assembly code, in which case we expect a function __memcmp16,
+// or generic C support.
+//
+// In case of the generic support we declare two versions: one in this header file meant to be
+// inlined, and a static version that assembly stubs can link against.
+//
+// In both cases, MemCmp16 is declared.
+
+#if defined(__arm__) || defined(__mips)
+
+extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
+#define MemCmp16 __memcmp16
+
+#else
+
+// This is the generic inlined version.
+static inline int32_t MemCmp16(const uint16_t* s0, const uint16_t* s1, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    if (s0[i] != s1[i]) {
+      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+    }
+  }
+  return 0;
+}
+
+extern "C" int32_t memcmp16_generic_static(const uint16_t* s0, const uint16_t* s1, size_t count);
+#endif
+
+#endif  // ART_RUNTIME_ARCH_MEMCMP16_H_
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 500a2eb..08caa80 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -103,7 +103,6 @@
 extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -216,7 +215,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/mips/memcmp16_mips.S b/runtime/arch/mips/memcmp16_mips.S
new file mode 100644
index 0000000..0196edc
--- /dev/null
+++ b/runtime/arch/mips/memcmp16_mips.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
+#define ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
+
+#include "asm_support_mips.S"
+
+// u4 __memcmp16(const u2*, const u2*, size_t);
+ENTRY __memcmp16
+  li  $t0,0
+  li  $t1,0
+  beqz  $a2,done   /* 0 length string */
+  beq $a0,$a1,done    /* strings are identical */
+
+  /* Unoptimised... */
+1:  lhu $t0,0($a0)
+  lhu $t1,0($a1)
+  addu  $a1,2
+  bne $t0,$t1,done
+  addu  $a0,2
+  subu  $a2,1
+  bnez  $a2,1b
+
+done:
+  subu  $v0,$t0,$t1
+  j $ra
+END __memcmp16
+
+#endif  // ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0b7f268c..7785bc3 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -77,14 +77,15 @@
 #if defined(__i386__)
     // TODO: Set the thread?
     __asm__ __volatile__(
-        "pushl %[referrer]\n\t"     // Store referrer
+        "subl $12, %%esp\n\t"       // Align stack.
+        "pushl %[referrer]\n\t"     // Store referrer.
         "call *%%edi\n\t"           // Call the stub
-        "addl $4, %%esp"            // Pop referrer
+        "addl $16, %%esp"           // Pop referrer
         : "=a" (result)
           // Use the result from eax
-          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
-            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-            : );  // clobber.
+        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
+          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+        : "memory");  // clobber.
     // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
     //       but compilation fails when declaring that.
 #elif defined(__arm__)
@@ -122,7 +123,7 @@
           // Use the result from r0
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer)
-        : );  // clobber.
+        : "memory");  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         // Spill x0-x7 which we say we don't clobber. May contain args.
@@ -255,7 +256,8 @@
           "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
           "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
-          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");  // clobber.
+          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+          "memory");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -268,9 +270,10 @@
         ".cfi_adjust_cfa_offset -16\n\t"
         : "=a" (result)
           // Use the result from rax
-          : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
-            // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-            : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
+          // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+          "memory");  // clobber all
     // TODO: Should we clobber the other registers?
 #else
     LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
@@ -298,14 +301,15 @@
     // TODO: Set the thread?
     __asm__ __volatile__(
         "movd %[hidden], %%xmm0\n\t"
+        "subl $12, %%esp\n\t"       // Align stack.
         "pushl %[referrer]\n\t"     // Store referrer
         "call *%%edi\n\t"           // Call the stub
-        "addl $4, %%esp"            // Pop referrer
+        "addl $16, %%esp"           // Pop referrer
         : "=a" (result)
           // Use the result from eax
-          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden)
-            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-            : );  // clobber.
+        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden)
+          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+        : "memory");  // clobber.
     // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
     //       but compilation fails when declaring that.
 #elif defined(__arm__)
@@ -343,9 +347,9 @@
         "mov %[result], r0\n\t"     // Save the result
         : [result] "=r" (result)
           // Use the result from r0
-          : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-            [referrer] "r"(referrer), [hidden] "r"(hidden)
-            : );  // clobber.
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "memory");  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         // Spill x0-x7 which we say we don't clobber. May contain args.
@@ -477,7 +481,8 @@
           "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
           "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
-          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");  // clobber.
+          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+          "memory");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -494,7 +499,8 @@
         // Use the result from rax
         : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer), [hidden] "m"(hidden)
         // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+          "memory");  // clobber all
     // TODO: Should we clobber the other registers?
 #else
     LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
@@ -1567,8 +1573,7 @@
       StackHandleScope<1> hs(self);
       Handle<mirror::ArtField> f(hs.NewHandle(fields->Get(i)));
 
-      FieldHelper fh(f.Get());
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = f->GetTypeAsPrimitiveType();
       switch (type) {
         case Primitive::Type::kPrimInt:
           if (test_type == type) {
@@ -1584,7 +1589,7 @@
 
         case Primitive::Type::kPrimNot:
           // Don't try array.
-          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+          if (test_type == type && f->GetTypeDescriptor()[0] != '[') {
             GetSetObjStatic(&obj, &f, self, m.Get(), test);
           }
           break;
@@ -1603,8 +1608,7 @@
       StackHandleScope<1> hs(self);
       Handle<mirror::ArtField> f(hs.NewHandle(fields->Get(i)));
 
-      FieldHelper fh(f.Get());
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = f->GetTypeAsPrimitiveType();
       switch (type) {
         case Primitive::Type::kPrimInt:
           if (test_type == type) {
@@ -1620,7 +1624,7 @@
 
         case Primitive::Type::kPrimNot:
           // Don't try array.
-          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+          if (test_type == type && f->GetTypeDescriptor()[0] != '[') {
             GetSetObjInstance(&obj, &f, self, m.Get(), test);
           }
           break;
@@ -1780,9 +1784,87 @@
   ASSERT_FALSE(self->IsExceptionPending());
   EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result);
 #else
-  LOG(INFO) << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA;
+  LOG(INFO) << "Skipping imt as I don't know how to do that on " << kRuntimeISA;
   // Force-print to std::cout so it's also outside the logcat.
-  std::cout << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA << std::endl;
+  std::cout << "Skipping imt as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+#if defined(__arm__) || defined(__aarch64__)
+extern "C" void art_quick_indexof(void);
+#endif
+
+TEST_F(StubTest, StringIndexOf) {
+#if defined(__arm__) || defined(__aarch64__)
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  // Create some strings
+  // Use array so we can index into it and use a matrix for expected results
+  // Setup: The first half is standard. The second half uses a non-zero offset.
+  // TODO: Shared backing arrays.
+  static constexpr size_t kStringCount = 7;
+  const char* c_str[kStringCount] = { "", "a", "ba", "cba", "dcba", "edcba", "asdfghjkl" };
+  static constexpr size_t kCharCount = 5;
+  const char c_char[kCharCount] = { 'a', 'b', 'c', 'd', 'e' };
+
+  StackHandleScope<kStringCount> hs(self);
+  Handle<mirror::String> s[kStringCount];
+
+  for (size_t i = 0; i < kStringCount; ++i) {
+    s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c_str[i]));
+  }
+
+  // Matrix of expectations. First component is first parameter. Note we only check against the
+  // sign, not the value. As we are testing random offsets, we need to compute this and need to
+  // rely on String::CompareTo being correct.
+  static constexpr size_t kMaxLen = 9;
+  DCHECK_LE(strlen(c_str[kStringCount-1]), kMaxLen) << "Please fix the indexof test.";
+
+  // Last dimension: start, offset by 1.
+  int32_t expected[kStringCount][kCharCount][kMaxLen + 3];
+  for (size_t x = 0; x < kStringCount; ++x) {
+    for (size_t y = 0; y < kCharCount; ++y) {
+      for (size_t z = 0; z <= kMaxLen + 2; ++z) {
+        expected[x][y][z] = s[x]->FastIndexOf(c_char[y], static_cast<int32_t>(z) - 1);
+      }
+    }
+  }
+
+  // Play with it...
+
+  for (size_t x = 0; x < kStringCount; ++x) {
+    for (size_t y = 0; y < kCharCount; ++y) {
+      for (size_t z = 0; z <= kMaxLen + 2; ++z) {
+        int32_t start = static_cast<int32_t>(z) - 1;
+
+        // Test string_compareto x y
+        size_t result = Invoke3(reinterpret_cast<size_t>(s[x].Get()), c_char[y], start,
+                                reinterpret_cast<uintptr_t>(&art_quick_indexof), self);
+
+        EXPECT_FALSE(self->IsExceptionPending());
+
+        // The result is a 32b signed integer
+        union {
+          size_t r;
+          int32_t i;
+        } conv;
+        conv.r = result;
+
+        EXPECT_EQ(expected[x][y][z], conv.i) << "Wrong result for " << c_str[x] << " / " <<
+            c_char[y] << " @ " << start;
+      }
+    }
+  }
+
+  // TODO: Deallocate things.
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping indexof as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping indexof as I don't know how to do that on " << kRuntimeISA << std::endl;
 #endif
 }
 
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index f1d0746..ae39be1 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -28,6 +28,7 @@
     #define END_MACRO .endmacro
 
     // Clang's as(1) uses $0, $1, and so on for macro arguments.
+    #define RAW_VAR(name,index) $index
     #define VAR(name,index) SYMBOL($index)
     #define PLT_VAR(name, index) SYMBOL($index)
     #define REG_VAR(name,index) %$index
@@ -50,6 +51,7 @@
     // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
     // special character meaning care needs to be taken when passing registers as macro arguments.
     .altmacro
+    #define RAW_VAR(name,index) name&
     #define VAR(name,index) name&
     #define PLT_VAR(name, index) name&@PLT
     #define REG_VAR(name,index) %name
@@ -94,7 +96,7 @@
 #if !defined(__APPLE__)
     #define SYMBOL(name) name
     #if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
-        // TODO: Disabled for old clang 3.3, this leads to text reolocations and there should be a
+        // TODO: Disabled for old clang 3.3, this leads to text relocations and there should be a
         // better fix.
         #define PLT_SYMBOL(name) name // ## @PLT
     #else
@@ -151,8 +153,10 @@
 END_MACRO
 
 MACRO0(SETUP_GOT_NOSAVE)
+#ifndef __APPLE__
     call __x86.get_pc_thunk.bx
     addl $_GLOBAL_OFFSET_TABLE_, %ebx
+#endif
 END_MACRO
 
 MACRO0(SETUP_GOT)
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index c53fa1e..c30dca1 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -81,7 +81,6 @@
 extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -194,7 +193,6 @@
 
   // Intrinsics
   // qpoints->pIndexOf = nullptr;  // Not needed on x86
-  qpoints->pMemcmp16 = art_quick_memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 07268ea..28e4dd6 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -111,7 +111,7 @@
 END_MACRO
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
@@ -123,11 +123,11 @@
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
     call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
     int3                          // unreached
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
@@ -139,11 +139,11 @@
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
     call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
     int3                          // unreached
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %edx
     // Outgoing argument set up
@@ -155,7 +155,7 @@
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
     call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, arg2, Thread*, SP)
     int3                          // unreached
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
     /*
@@ -207,7 +207,7 @@
      * pointing back to the original caller.
      */
 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     // Set up the callee save frame to conform with Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
     // return address
     PUSH edi
@@ -248,7 +248,7 @@
     addl MACRO_LITERAL(4), %esp   // Pop code pointer off stack
     CFI_ADJUST_CFA_OFFSET(-4)
     DELIVER_PENDING_EXCEPTION
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
@@ -315,7 +315,7 @@
 END_FUNCTION art_quick_invoke_stub
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %edx                // remember SP
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
@@ -330,11 +330,11 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %edx                // remember SP
     SETUP_GOT_NOSAVE              // clobbers EBX
@@ -349,11 +349,11 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %edx                // remember SP
     SETUP_GOT_NOSAVE              // clobbers EBX
@@ -368,11 +368,11 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %ebx                // remember SP
     // Outgoing argument set up
@@ -390,7 +390,7 @@
     CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
@@ -653,17 +653,17 @@
      */
 DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
     testl %eax, %eax
-    jnz art_quick_aput_obj_with_bound_check
-    jmp art_quick_throw_null_pointer_exception
+    jnz SYMBOL(art_quick_aput_obj_with_bound_check)
+    jmp SYMBOL(art_quick_throw_null_pointer_exception)
 END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
 
 DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
     movl ARRAY_LENGTH_OFFSET(%eax), %ebx
     cmpl %ebx, %ecx
-    jb art_quick_aput_obj
+    jb SYMBOL(art_quick_aput_obj)
     mov %ecx, %eax
     mov %ebx, %ecx
-    jmp art_quick_throw_array_bounds
+    jmp SYMBOL(art_quick_throw_array_bounds)
 END_FUNCTION art_quick_aput_obj_with_bound_check
 
 DEFINE_FUNCTION art_quick_aput_obj
@@ -1122,7 +1122,7 @@
     movd %xmm0, %ecx              // get target method index stored in xmm0
     movl OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4), %eax  // load the target method
     POP ecx
-    jmp art_quick_invoke_interface_trampoline
+    jmp SYMBOL(art_quick_invoke_interface_trampoline)
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
@@ -1152,8 +1152,92 @@
 END_FUNCTION art_quick_resolution_trampoline
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    int3
-    int3
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    // This also stores the native ArtMethod reference at the bottom of the stack.
+
+    movl %esp, %ebp                 // save SP at callee-save frame
+    movl %esp, %edi
+    CFI_DEF_CFA_REGISTER(edi)
+    subl LITERAL(5120), %esp
+    // prepare for artQuickGenericJniTrampoline call
+    // (Thread*,  SP)
+    //  (esp)    4(esp)   <= C calling convention
+    //  fs:...    ebp     <= where they are
+    // Also: PLT, so need GOT in ebx.
+
+    subl LITERAL(8), %esp         // Padding for 16B alignment.
+    pushl %ebp                    // Pass SP (to ArtMethod).
+    pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
+    SETUP_GOT_NOSAVE              // Clobbers ebx.
+    call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
+    // Drop call stack.
+    addl LITERAL(16), %esp
+
+    // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
+    // get the adjusted frame pointer
+    popl %ebp
+
+    // Check for error, negative value.
+    test %eax, %eax
+    js .Lentry_error
+
+    // release part of the alloca, get the code pointer
+    addl %eax, %esp
+    popl %eax
+
+    // On x86 there are no registers passed, so nothing to pop here.
+
+    // Native call.
+    call *%eax
+
+    // Pop native stack, but keep the space that was reserved cookie.
+    movl %ebp, %esp
+    subl LITERAL(16), %esp        // Alignment.
+
+    // result sign extension is handled in C code
+    // prepare for artQuickGenericJniEndTrampoline call
+    // (Thread*,  SP,  result, result_f)
+    //  (esp)   4(esp)  8(esp)  16(esp)    <= C calling convention
+    //  fs:...    ebp  eax:edx   xmm0      <= where they are
+
+    subl LITERAL(8), %esp         // Pass float result.
+    movsd %xmm0, (%esp)
+    pushl %edx                    // Pass int result.
+    pushl %eax
+    pushl %ebp                    // Pass SP (to ArtMethod).
+    pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
+    call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
+
+    // Tear down the alloca.
+    movl %edi, %esp
+    CFI_DEF_CFA_REGISTER(esp)
+
+    // Pending exceptions possible.
+    mov %fs:THREAD_EXCEPTION_OFFSET, %ebx
+    testl %ebx, %ebx
+    jnz .Lexception_in_native
+
+    // Tear down the callee-save frame.
+    addl LITERAL(4), %esp     // Remove padding
+    CFI_ADJUST_CFA_OFFSET(-4)
+    POP ecx
+    addl LITERAL(4), %esp     // Avoid edx, as it may be part of the result.
+    CFI_ADJUST_CFA_OFFSET(-4)
+    POP ebx
+    POP ebp  // Restore callee saves
+    POP esi
+    POP edi
+    // store into fpr, for when it's a fpr return...
+    movd %eax, %xmm0
+    movd %edx, %xmm1
+    punpckldq %xmm1, %xmm0
+    ret
+.Lentry_error:
+    movl %edi, %esp
+    CFI_DEF_CFA_REGISTER(esp)
+.Lexception_in_native:
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_generic_jni_trampoline
 
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 9f36927..b97c143 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -156,7 +156,11 @@
 
   // Free LDT entry.
 #if defined(__APPLE__)
-  i386_set_ldt(selector >> 3, 0, 1);
+  // TODO: release selectors on OS/X this is a leak which will cause ldt entries to be exhausted
+  // after enough threads are created. However, the following code results in kernel panics in OS/X
+  // 10.9.
+  UNUSED(selector);
+  // i386_set_ldt(selector >> 3, 0, 1);
 #else
   user_desc ldt_entry;
   memset(&ldt_entry, 0, sizeof(ldt_entry));
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index aeda072..2612417 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -80,7 +80,6 @@
 extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -193,7 +192,6 @@
 
   // Intrinsics
   // qpoints->pIndexOf = NULL;  // Not needed on x86.
-  qpoints->pMemcmp16 = art_quick_memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0d9d388..c9220c8 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -261,7 +261,7 @@
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
-    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %rdx  // pass caller Method*
+    movl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %edx  // pass caller Method*
     movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
     movq %rsp, %r8                                         // pass SP
 
@@ -897,7 +897,7 @@
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movq 8(%rsp), %rsi                 // pass referrer
+    movl 8(%rsp), %esi                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // arg0 is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
@@ -910,7 +910,7 @@
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movq 8(%rsp), %rdx                 // pass referrer
+    movl 8(%rsp), %edx                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // arg0 and arg1 are in rdi/rsi
     movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
@@ -923,7 +923,7 @@
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movq 8(%rsp), %rcx                 // pass referrer
+    movl 8(%rsp), %ecx                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
@@ -953,7 +953,7 @@
 // This is singled out as the argument order is different.
 DEFINE_FUNCTION art_quick_set64_static
     movq %rsi, %rdx                    // pass new_val
-    movq 8(%rsp), %rsi                 // pass referrer
+    movl 8(%rsp), %esi                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // field_idx is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
@@ -1002,15 +1002,12 @@
 END_FUNCTION art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. Clobbers %rax (which will be clobbered later anyways).
-     *
-     * xmm0 is a hidden argument that holds the target method's dex method index.
-     * TODO: With proper hard-float support, this needs to be kept in sync with the quick compiler.
+     * Called to resolve an imt conflict.
+     * rax is a hidden argument that holds the target method's dex method index.
      */
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
-    movq 16(%rsp), %rdi            // load caller Method*
+    movl 8(%rsp), %edi            // load caller Method*
     movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
-    movd %xmm0, %rax               // get target method index stored in xmm0
     movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
     jmp art_quick_invoke_interface_trampoline_local
 END_FUNCTION art_quick_imt_conflict_trampoline
@@ -1296,14 +1293,77 @@
     /*
      * Routine that intercepts method calls and returns.
      */
-UNIMPLEMENTED art_quick_instrumentation_entry
-UNIMPLEMENTED art_quick_instrumentation_exit
+DEFINE_FUNCTION art_quick_instrumentation_entry
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    movq %rdi, %r12               // Preserve method pointer in a callee-save.
+
+    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
+    movq %rsp, %rcx                     // Pass SP.
+    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %r8   // Pass return PC.
+
+    call PLT_SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
+
+                                  // %rax = result of call.
+    movq %r12, %rdi               // Reload method pointer.
+
+    leaq art_quick_instrumentation_exit_local(%rip), %r12   // Set up return through instrumentation
+    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
+
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    jmp *%rax                     // Tail call to intended method.
+END_FUNCTION art_quick_instrumentation_entry
+
+DEFINE_FUNCTION art_quick_instrumentation_exit
+    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
+
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+
+    // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
+    // we would need to fully restore it. As there are a good number of callee-save registers, it
+    // seems easier to have an extra small stack area. But this should be revisited.
+
+    movq  %rsp, %rsi                          // Pass SP.
+
+    PUSH rax                  // Save integer result.
+    subq LITERAL(8), %rsp     // Save floating-point result.
+    CFI_ADJUST_CFA_OFFSET(8)
+    movd %xmm0, (%rsp)
+
+    movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
+    movq  %rax, %rdx                          // Pass integer result.
+    movq  %xmm0, %rcx                         // Pass floating-point result.
+
+    call PLT_SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
+
+    movq  %rax, %rdi          // Store return PC
+    movq  %rdx, %rsi          // Store second return PC in hidden arg.
+
+    movd (%rsp), %xmm0        // Restore floating-point result.
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    POP rax                   // Restore integer result.
+
+    addq LITERAL(FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %rsp   // Drop save frame and fake return pc.
+
+    jmp   *%rdi               // Return.
+END_FUNCTION art_quick_instrumentation_exit
 
     /*
      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
      * will long jump to the upcall with a special exception of -1.
      */
-UNIMPLEMENTED art_quick_deoptimize
+DEFINE_FUNCTION art_quick_deoptimize
+    pushq %rsi                     // Fake that we were called. Use hidden arg.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+                                   // Stack should be aligned now.
+    movq %rsp, %rsi                           // Pass SP.
+    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
+    call PLT_SYMBOL(artDeoptimize) // artDeoptimize(Thread*, SP)
+    int3                           // Unreachable.
+END_FUNCTION art_quick_deoptimize
+
 
     /*
      * String's compareTo.
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 9262db6..ed83a33 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -35,161 +35,14 @@
 
 class Mutex;
 
-#if ART_HAVE_STDATOMIC
-template<typename T>
-class Atomic : public std::atomic<T> {
- public:
-  COMPILE_ASSERT(sizeof(T) == sizeof(std::atomic<T>),
-                 std_atomic_size_differs_from_that_of_underlying_type);
-  COMPILE_ASSERT(alignof(T) == alignof(std::atomic<T>),
-                 std_atomic_alignment_differs_from_that_of_underlying_type);
-
-  Atomic<T>() : std::atomic<T>() { }
-
-  explicit Atomic<T>(T value) : std::atomic<T>(value) { }
-
-  // Load from memory without ordering or synchronization constraints.
-  T LoadRelaxed() const {
-    return this->load(std::memory_order_relaxed);
-  }
-
-  // Load from memory with a total ordering.
-  T LoadSequentiallyConsistent() const {
-    return this->load(std::memory_order_seq_cst);
-  }
-
-  // Store to memory without ordering or synchronization constraints.
-  void StoreRelaxed(T desired) {
-    this->store(desired, std::memory_order_relaxed);
-  }
-
-  // Store to memory with a total ordering.
-  void StoreSequentiallyConsistent(T desired) {
-    this->store(desired, std::memory_order_seq_cst);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. Doesn't
-  // imply ordering or synchronization constraints.
-  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
-    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. Prior writes
-  // made to other memory locations by the thread that did the release become visible in this
-  // thread.
-  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
-    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. prior writes
-  // to other memory locations become visible to the threads that do a consume or an acquire on the
-  // same location.
-  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
-    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
-  }
-
-  T FetchAndAddSequentiallyConsistent(const T value) {
-    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
-  }
-
-  T FetchAndSubSequentiallyConsistent(const T value) {
-    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
-  }
-
-  volatile T* Address() {
-    return reinterpret_cast<T*>(this);
-  }
-
-  static T MaxValue() {
-    return std::numeric_limits<T>::max();
-  }
-};
-#else
-template<typename T>
-class Atomic {
- public:
-  Atomic<T>() : value_(0) { }
-
-  explicit Atomic<T>(T value) : value_(value) { }
-
-  // Load from memory without ordering or synchronization constraints.
-  T LoadRelaxed() const {
-    return value_;
-  }
-
-  // Load from memory with a total ordering.
-  T LoadSequentiallyConsistent() const;
-
-  // Store to memory without ordering or synchronization constraints.
-  void StoreRelaxed(T desired) {
-    value_ = desired;
-  }
-
-  // Store to memory with a total ordering.
-  void StoreSequentiallyConsistent(T desired);
-
-  // Atomically replace the value with desired value if it matches the expected value. Doesn't
-  // imply ordering or synchronization constraints.
-  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
-    // TODO: make this relaxed.
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. Prior writes
-  // made to other memory locations by the thread that did the release become visible in this
-  // thread.
-  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
-    // TODO: make this acquire.
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. prior writes
-  // to other memory locations become visible to the threads that do a consume or an acquire on the
-  // same location.
-  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
-    // TODO: make this release.
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  volatile T* Address() {
-    return &value_;
-  }
-
-  T FetchAndAddSequentiallyConsistent(const T value) {
-    return __sync_fetch_and_add(&value_, value);  // Return old_value.
-  }
-
-  T FetchAndSubSequentiallyConsistent(const T value) {
-    return __sync_fetch_and_sub(&value_, value);  // Return old value.
-  }
-
-  T operator++() {  // Prefix operator.
-    return __sync_add_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  T operator++(int) {  // Postfix operator.
-    return __sync_fetch_and_add(&value_, 1);  // Return old value.
-  }
-
-  T operator--() {  // Prefix operator.
-    return __sync_sub_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  T operator--(int) {  // Postfix operator.
-    return __sync_fetch_and_sub(&value_, 1);  // Return old value.
-  }
-
-  static T MaxValue() {
-    return std::numeric_limits<T>::max();
-  }
-
- private:
-  T value_;
-};
-#endif
-
-typedef Atomic<int32_t> AtomicInteger;
-
+// QuasiAtomic encapsulates two separate facilities that we are
+// trying to move away from:  "quasiatomic" 64 bit operations
+// and custom memory fences.  For the time being, they remain
+// exposed.  Clients should be converted to use either class Atomic
+// below whenever possible, and should eventually use C++11 atomics.
+// The two facilities that do not have a good C++11 analog are
+// ThreadFenceForConstructor and Atomic::*JavaData.
+//
 // NOTE: Two "quasiatomic" operations on the exact same memory address
 // are guaranteed to operate atomically with respect to each other,
 // but no guarantees are made about quasiatomic operations mixed with
@@ -286,6 +139,11 @@
 
   // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value"
   // and return true. Otherwise, don't swap, and return false.
+  // This is fully ordered, i.e. it has C++11 memory_order_seq_cst
+  // semantics (assuming all other accesses use a mutex if this one does).
+  // This has "strong" semantics; if it fails then it is guaranteed that
+  // at some point during the execution of Cas64, *addr was not equal to
+  // old_value.
   static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
     if (!kNeedSwapMutexes) {
       return __sync_bool_compare_and_swap(addr, old_value, new_value);
@@ -299,9 +157,37 @@
     return kNeedSwapMutexes;
   }
 
-  static void MembarLoadStore() {
+  #if ART_HAVE_STDATOMIC
+
+  static void ThreadFenceAcquire() {
+    std::atomic_thread_fence(std::memory_order_acquire);
+  }
+
+  static void ThreadFenceRelease() {
+    std::atomic_thread_fence(std::memory_order_release);
+  }
+
+  static void ThreadFenceForConstructor() {
+    #if defined(__aarch64__)
+      __asm__ __volatile__("dmb ishst" : : : "memory");
+    #else
+      std::atomic_thread_fence(std::memory_order_release);
+    #endif
+  }
+
+  static void ThreadFenceSequentiallyConsistent() {
+    std::atomic_thread_fence(std::memory_order_seq_cst);
+  }
+
+  #else
+
+  static void ThreadFenceAcquire() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ish" : : : "memory");
+    // Could possibly use dmb ishld on aarch64
+    // But currently we also use this on volatile loads
+    // to enforce store atomicity.  Ishld is
+    // insufficient for that purpose.
   #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
@@ -311,9 +197,10 @@
   #endif
   }
 
-  static void MembarLoadLoad() {
+  static void ThreadFenceRelease() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ish" : : : "memory");
+    // ishst doesn't order load followed by store.
   #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
@@ -323,7 +210,11 @@
   #endif
   }
 
-  static void MembarStoreStore() {
+  // Fence at the end of a constructor with final fields
+  // or allocation.  We believe this
+  // only has to order stores, and can thus be weaker than
+  // release on aarch64.
+  static void ThreadFenceForConstructor() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ishst" : : : "memory");
   #elif defined(__i386__) || defined(__x86_64__)
@@ -335,7 +226,7 @@
   #endif
   }
 
-  static void MembarStoreLoad() {
+  static void ThreadFenceSequentiallyConsistent() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ish" : : : "memory");
   #elif defined(__i386__) || defined(__x86_64__)
@@ -346,6 +237,7 @@
   #error Unexpected architecture
   #endif
   }
+  #endif
 
  private:
   static Mutex* GetSwapMutex(const volatile int64_t* addr);
@@ -360,19 +252,352 @@
   DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
 };
 
+#if ART_HAVE_STDATOMIC
+template<typename T>
+class Atomic : public std::atomic<T> {
+ public:
+  Atomic<T>() : std::atomic<T>() { }
+
+  explicit Atomic<T>(T value) : std::atomic<T>(value) { }
+
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
+    return this->load(std::memory_order_relaxed);
+  }
+
+  // Word tearing allowed, but may race.
+  // TODO: Optimize?
+  // There has been some discussion of eventually disallowing word
+  // tearing for Java data loads.
+  T LoadJavaData() const {
+    return this->load(std::memory_order_relaxed);
+  }
+
+  // Load from memory with a total ordering.
+  // Corresponds exactly to a Java volatile load.
+  T LoadSequentiallyConsistent() const {
+    return this->load(std::memory_order_seq_cst);
+  }
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    this->store(desired, std::memory_order_relaxed);
+  }
+
+  // Word tearing allowed, but may race.
+  void StoreJavaData(T desired) {
+    this->store(desired, std::memory_order_relaxed);
+  }
+
+  // Store to memory with release ordering.
+  void StoreRelease(T desired) {
+    this->store(desired, std::memory_order_release);
+  }
+
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired) {
+    this->store(desired, std::memory_order_seq_cst);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value.
+  // Participates in total ordering of atomic operations.
+  bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
+    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_seq_cst);
+  }
+
+  // The same, except it may fail spuriously.
+  bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_seq_cst);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
+    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
+  }
+
+  // The same, except it may fail spuriously.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
+    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
+  }
+
+  T FetchAndSubSequentiallyConsistent(const T value) {
+    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
+  }
+
+  volatile T* Address() {
+    return reinterpret_cast<T*>(this);
+  }
+
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
+  }
+};
+
+#else
+
+template<typename T> class Atomic;
+
+// Helper class for Atomic to deal separately with size 8 and small
+// objects.  Should not be used directly.
+
+template<int SZ, class T> struct AtomicHelper {
+  friend class Atomic<T>;
+
+ private:
+  COMPILE_ASSERT(sizeof(T) <= 4, bad_atomic_helper_arg);
+
+  static T LoadRelaxed(const volatile T* loc) {
+    // sizeof(T) <= 4
+    return *loc;
+  }
+
+  static void StoreRelaxed(volatile T* loc, T desired) {
+    // sizeof(T) <= 4
+    *loc = desired;
+  }
+
+  static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
+                                                  T expected_value, T desired_value) {
+    // sizeof(T) <= 4
+    return __sync_bool_compare_and_swap(loc, expected_value, desired_value);
+  }
+};
+
+template<class T> struct AtomicHelper<8, T> {
+  friend class Atomic<T>;
+
+ private:
+  COMPILE_ASSERT(sizeof(T) == 8, bad_large_atomic_helper_arg);
+
+  static T LoadRelaxed(const volatile T* loc) {
+    // sizeof(T) == 8
+    volatile const int64_t* loc_ptr =
+              reinterpret_cast<volatile const int64_t*>(loc);
+    return static_cast<T>(QuasiAtomic::Read64(loc_ptr));
+  }
+
+  static void StoreRelaxed(volatile T* loc, T desired) {
+    // sizeof(T) == 8
+    volatile int64_t* loc_ptr =
+                reinterpret_cast<volatile int64_t*>(loc);
+    QuasiAtomic::Write64(loc_ptr,
+                         static_cast<int64_t>(desired));
+  }
+
+
+  static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
+                                                  T expected_value, T desired_value) {
+    // sizeof(T) == 8
+    volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc);
+    return QuasiAtomic::Cas64(
+                 static_cast<int64_t>(reinterpret_cast<uintptr_t>(expected_value)),
+                 static_cast<int64_t>(reinterpret_cast<uintptr_t>(desired_value)), loc_ptr);
+  }
+};
+
+template<typename T>
+class Atomic {
+ private:
+  COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
+
+ public:
+  Atomic<T>() : value_(0) { }
+
+  explicit Atomic<T>(T value) : value_(value) { }
+
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
+    return AtomicHelper<sizeof(T), T>::LoadRelaxed(&value_);
+  }
+
+  // Word tearing allowed, but may race.
+  T LoadJavaData() const {
+    return value_;
+  }
+
+  // Load from memory with a total ordering.
+  T LoadSequentiallyConsistent() const;
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    AtomicHelper<sizeof(T), T>::StoreRelaxed(&value_, desired);
+  }
+
+  // Word tearing allowed, but may race.
+  void StoreJavaData(T desired) {
+    value_ = desired;
+  }
+
+  // Store to memory with release ordering.
+  void StoreRelease(T desired);
+
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired);
+
+  // Atomically replace the value with desired value if it matches the expected value.
+  // Participates in total ordering of atomic operations.
+  bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
+    return AtomicHelper<sizeof(T), T>::
+        CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
+  }
+
+  // The same, but may fail spuriously.
+  bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
+    // TODO: Take advantage of the fact that it may fail spuriously.
+    return AtomicHelper<sizeof(T), T>::
+        CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
+    // TODO: make this relaxed.
+    return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  // The same, but may fail spuriously.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    // TODO: Take advantage of the fact that it may fail spuriously.
+    // TODO: make this relaxed.
+    return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior accesses
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    // TODO: make this acquire.
+    return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior accesses
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    // TODO: make this release.
+    return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  volatile T* Address() {
+    return &value_;
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
+    if (sizeof(T) <= 4) {
+      return __sync_fetch_and_add(&value_, value);  // Return old value.
+    } else {
+      T expected;
+      do {
+        expected = LoadRelaxed();
+      } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected + value));
+      return expected;
+    }
+  }
+
+  T FetchAndSubSequentiallyConsistent(const T value) {
+    if (sizeof(T) <= 4) {
+      return __sync_fetch_and_sub(&value_, value);  // Return old value.
+    } else {
+      return FetchAndAddSequentiallyConsistent(-value);
+    }
+  }
+
+  T operator++() {  // Prefix operator.
+    if (sizeof(T) <= 4) {
+      return __sync_add_and_fetch(&value_, 1);  // Return new value.
+    } else {
+      return FetchAndAddSequentiallyConsistent(1) + 1;
+    }
+  }
+
+  T operator++(int) {  // Postfix operator.
+    return FetchAndAddSequentiallyConsistent(1);
+  }
+
+  T operator--() {  // Prefix operator.
+    if (sizeof(T) <= 4) {
+      return __sync_sub_and_fetch(&value_, 1);  // Return new value.
+    } else {
+      return FetchAndSubSequentiallyConsistent(1) - 1;
+    }
+  }
+
+  T operator--(int) {  // Postfix operator.
+    return FetchAndSubSequentiallyConsistent(1);
+  }
+
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
+  }
+
+
+ private:
+  volatile T value_;
+};
+#endif
+
+typedef Atomic<int32_t> AtomicInteger;
+
+COMPILE_ASSERT(sizeof(AtomicInteger) == sizeof(int32_t), weird_atomic_int_size);
+COMPILE_ASSERT(alignof(AtomicInteger) == alignof(int32_t),
+               atomic_int_alignment_differs_from_that_of_underlying_type);
+COMPILE_ASSERT(sizeof(Atomic<int64_t>) == sizeof(int64_t), weird_atomic_int64_size);
+#if defined(__LP64__)
+  COMPILE_ASSERT(alignof(Atomic<int64_t>) == alignof(int64_t),
+                 atomic_int64_alignment_differs_from_that_of_underlying_type);
+#endif
+// The above fails on x86-32.
+// This is OK, since we explicitly arrange for alignment of 8-byte fields.
+
+
 #if !ART_HAVE_STDATOMIC
 template<typename T>
 inline T Atomic<T>::LoadSequentiallyConsistent() const {
   T result = value_;
-  QuasiAtomic::MembarLoadLoad();
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceAcquire();
+    // We optimistically assume this suffices for store atomicity.
+    // On ARMv8 we strengthen ThreadFenceAcquire to make that true.
+  }
   return result;
 }
 
 template<typename T>
+inline void Atomic<T>::StoreRelease(T desired) {
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceRelease();
+  }
+  StoreRelaxed(desired);
+}
+
+template<typename T>
 inline void Atomic<T>::StoreSequentiallyConsistent(T desired) {
-  QuasiAtomic::MembarStoreStore();
-  value_ = desired;
-  QuasiAtomic::MembarStoreLoad();
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceRelease();
+  }
+  StoreRelaxed(desired);
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceSequentiallyConsistent();
+  }
 }
 
 #endif
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 814195c..caeb946 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -66,6 +66,16 @@
     } \
   } while (false)
 
+// CHECK that can be used in a constexpr function. For example,
+//    constexpr int half(int n) {
+//      return
+//          DCHECK_CONSTEXPR(n >= 0, , 0)
+//          CHECK_CONSTEXPR((n & 1) == 0), << "Extra debugging output: n = " << n, 0)
+//          n / 2;
+//    }
+#define CHECK_CONSTEXPR(x, out, dummy) \
+  (UNLIKELY(!(x))) ? (LOG(FATAL) << "Check failed: " << #x out, dummy) :
+
 #ifndef NDEBUG
 
 #define DCHECK(x) CHECK(x)
@@ -77,6 +87,7 @@
 #define DCHECK_GT(x, y) CHECK_GT(x, y)
 #define DCHECK_STREQ(s1, s2) CHECK_STREQ(s1, s2)
 #define DCHECK_STRNE(s1, s2) CHECK_STRNE(s1, s2)
+#define DCHECK_CONSTEXPR(x, out, dummy) CHECK_CONSTEXPR(x, out, dummy)
 
 #else  // NDEBUG
 
@@ -116,6 +127,9 @@
   while (false) \
     CHECK_STRNE(str1, str2)
 
+#define DCHECK_CONSTEXPR(x, out, dummy) \
+  (false && (x)) ? (dummy) :
+
 #endif
 
 #define LOG(severity) ::art::LogMessage(__FILE__, __LINE__, severity, -1).stream()
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index a9472f7..1890181 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -25,6 +25,8 @@
 
 #include "cutils/atomic-inline.h"
 #include "cutils/trace.h"
+
+#include "base/stringprintf.h"
 #include "runtime.h"
 #include "thread.h"
 
@@ -43,54 +45,6 @@
 }
 #endif  // ART_USE_FUTEXES
 
-#if defined(__APPLE__)
-
-// This works on Mac OS 10.6 but hasn't been tested on older releases.
-struct __attribute__((__may_alias__)) darwin_pthread_mutex_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  int padding1;
-  uint32_t padding2;
-  int16_t padding3;
-  int16_t padding4;
-  uint32_t padding5;
-  pthread_t darwin_pthread_mutex_owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) darwin_pthread_rwlock_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  pthread_mutex_t padding1;
-  int padding2;
-  pthread_cond_t padding3;
-  pthread_cond_t padding4;
-  int padding5;
-  int padding6;
-  pthread_t darwin_pthread_rwlock_owner;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __APPLE__
-
-#if defined(__GLIBC__)
-
-struct __attribute__((__may_alias__)) glibc_pthread_mutex_t {
-  int32_t padding0[2];
-  int owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) glibc_pthread_rwlock_t {
-#ifdef __LP64__
-  int32_t padding0[6];
-#else
-  int32_t padding0[7];
-#endif
-  int writer;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __GLIBC__
-
 class ScopedContentionRecorder {
  public:
   ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid)
@@ -217,12 +171,14 @@
 #else
   CHECK_MUTEX_CALL(pthread_rwlock_rdlock, (&rwlock_));
 #endif
+  DCHECK(exclusive_owner_ == 0U || exclusive_owner_ == -1U);
   RegisterAsLocked(self);
   AssertSharedHeld(self);
 }
 
 inline void ReaderWriterMutex::SharedUnlock(Thread* self) {
   DCHECK(self == NULL || self == Thread::Current());
+  DCHECK(exclusive_owner_ == 0U || exclusive_owner_ == -1U);
   AssertSharedHeld(self);
   RegisterAsUnlocked(self);
 #if ART_USE_FUTEXES
@@ -260,26 +216,7 @@
 }
 
 inline uint64_t Mutex::GetExclusiveOwnerTid() const {
-#if ART_USE_FUTEXES
   return exclusive_owner_;
-#elif defined(__BIONIC__)
-  return static_cast<uint64_t>((mutex_.value >> 16) & 0xffff);
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_mutex_t*>(&mutex_)->owner;
-#elif defined(__APPLE__)
-  const darwin_pthread_mutex_t* dpmutex = reinterpret_cast<const darwin_pthread_mutex_t*>(&mutex_);
-  pthread_t owner = dpmutex->darwin_pthread_mutex_owner;
-  // 0 for unowned, -1 for PTHREAD_MTX_TID_SWITCHING
-  // TODO: should we make darwin_pthread_mutex_owner volatile and recheck until not -1?
-  if ((owner == (pthread_t)0) || (owner == (pthread_t)-1)) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
 }
 
 inline bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
@@ -305,23 +242,7 @@
     return exclusive_owner_;
   }
 #else
-#if defined(__BIONIC__)
-  return rwlock_.writerThreadId;
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
-#elif defined(__APPLE__)
-  const darwin_pthread_rwlock_t*
-      dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
-  pthread_t owner = dprwlock->darwin_pthread_rwlock_owner;
-  if (owner == (pthread_t)0) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
+  return exclusive_owner_;
 #endif
 }
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 11698e2..fd1eb12 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -263,19 +263,11 @@
     : BaseMutex(name, level), recursive_(recursive), recursion_count_(0) {
 #if ART_USE_FUTEXES
   state_ = 0;
-  exclusive_owner_ = 0;
   DCHECK_EQ(0, num_contenders_.LoadRelaxed());
-#elif defined(__BIONIC__) || defined(__APPLE__)
-  // Use recursive mutexes for bionic and Apple otherwise the
-  // non-recursive mutexes don't have TIDs to check lock ownership of.
-  pthread_mutexattr_t attributes;
-  CHECK_MUTEX_CALL(pthread_mutexattr_init, (&attributes));
-  CHECK_MUTEX_CALL(pthread_mutexattr_settype, (&attributes, PTHREAD_MUTEX_RECURSIVE));
-  CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, &attributes));
-  CHECK_MUTEX_CALL(pthread_mutexattr_destroy, (&attributes));
 #else
-  CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, NULL));
+  CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, nullptr));
 #endif
+  exclusive_owner_ = 0;
 }
 
 Mutex::~Mutex() {
@@ -331,12 +323,16 @@
         num_contenders_--;
       }
     } while (!done);
-    QuasiAtomic::MembarStoreLoad();
+    // We assert that no memory fence is needed here, since
+    // __sync_bool_compare_and_swap includes it.
+    // TODO: Change state_ to be a art::Atomic and use an intention revealing CAS operation
+    // that exposes the ordering semantics.
     DCHECK_EQ(state_, 1);
-    exclusive_owner_ = SafeGetTid(self);
 #else
     CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
 #endif
+    DCHECK_EQ(exclusive_owner_, 0U);
+    exclusive_owner_ = SafeGetTid(self);
     RegisterAsLocked(self);
   }
   recursion_count_++;
@@ -364,9 +360,8 @@
         return false;
       }
     } while (!done);
-    QuasiAtomic::MembarStoreLoad();
+    // We again assert no memory fence is needed.
     DCHECK_EQ(state_, 1);
-    exclusive_owner_ = SafeGetTid(self);
 #else
     int result = pthread_mutex_trylock(&mutex_);
     if (result == EBUSY) {
@@ -377,6 +372,8 @@
       PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
     }
 #endif
+    DCHECK_EQ(exclusive_owner_, 0U);
+    exclusive_owner_ = SafeGetTid(self);
     RegisterAsLocked(self);
   }
   recursion_count_++;
@@ -391,6 +388,7 @@
 void Mutex::ExclusiveUnlock(Thread* self) {
   DCHECK(self == NULL || self == Thread::Current());
   AssertHeld(self);
+  DCHECK_NE(exclusive_owner_, 0U);
   recursion_count_--;
   if (!recursive_ || recursion_count_ == 0) {
     if (kDebugLocking) {
@@ -399,35 +397,35 @@
     }
     RegisterAsUnlocked(self);
 #if ART_USE_FUTEXES
-  bool done = false;
-  do {
-    int32_t cur_state = state_;
-    if (LIKELY(cur_state == 1)) {
-      QuasiAtomic::MembarStoreStore();
-      // We're no longer the owner.
-      exclusive_owner_ = 0;
-      // Change state to 0.
-      done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
-      if (LIKELY(done)) {  // Spurious fail?
-        // Wake a contender
-        if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
-          futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
+    bool done = false;
+    do {
+      int32_t cur_state = state_;
+      if (LIKELY(cur_state == 1)) {
+        // The __sync_bool_compare_and_swap enforces the necessary memory ordering.
+        // We're no longer the owner.
+        exclusive_owner_ = 0;
+        // Change state to 0.
+        done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
+        if (LIKELY(done)) {  // Spurious fail?
+          // Wake a contender
+          if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
+            futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
+          }
+        }
+      } else {
+        // Logging acquires the logging lock, avoid infinite recursion in that case.
+        if (this != Locks::logging_lock_) {
+          LOG(FATAL) << "Unexpected state_ in unlock " << cur_state << " for " << name_;
+        } else {
+          LogMessageData data(__FILE__, __LINE__, INTERNAL_FATAL, -1);
+          LogMessage::LogLine(data, StringPrintf("Unexpected state_ %d in unlock for %s",
+                                                 cur_state, name_).c_str());
+          _exit(1);
         }
       }
-    } else {
-      // Logging acquires the logging lock, avoid infinite recursion in that case.
-      if (this != Locks::logging_lock_) {
-        LOG(FATAL) << "Unexpected state_ in unlock " << cur_state << " for " << name_;
-      } else {
-        LogMessageData data(__FILE__, __LINE__, INTERNAL_FATAL, -1);
-        LogMessage::LogLine(data, StringPrintf("Unexpected state_ %d in unlock for %s",
-                                               cur_state, name_).c_str());
-        _exit(1);
-      }
-    }
-  } while (!done);
-  QuasiAtomic::MembarStoreLoad();
+    } while (!done);
 #else
+    exclusive_owner_ = 0;
     CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
 #endif
   }
@@ -450,12 +448,13 @@
 ReaderWriterMutex::ReaderWriterMutex(const char* name, LockLevel level)
     : BaseMutex(name, level)
 #if ART_USE_FUTEXES
-    , state_(0), exclusive_owner_(0), num_pending_readers_(0), num_pending_writers_(0)
+    , state_(0), num_pending_readers_(0), num_pending_writers_(0)
 #endif
 {  // NOLINT(whitespace/braces)
 #if !ART_USE_FUTEXES
-  CHECK_MUTEX_CALL(pthread_rwlock_init, (&rwlock_, NULL));
+  CHECK_MUTEX_CALL(pthread_rwlock_init, (&rwlock_, nullptr));
 #endif
+  exclusive_owner_ = 0;
 }
 
 ReaderWriterMutex::~ReaderWriterMutex() {
@@ -504,10 +503,11 @@
     }
   } while (!done);
   DCHECK_EQ(state_, -1);
-  exclusive_owner_ = SafeGetTid(self);
 #else
   CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
 #endif
+  DCHECK_EQ(exclusive_owner_, 0U);
+  exclusive_owner_ = SafeGetTid(self);
   RegisterAsLocked(self);
   AssertExclusiveHeld(self);
 }
@@ -516,6 +516,7 @@
   DCHECK(self == NULL || self == Thread::Current());
   AssertExclusiveHeld(self);
   RegisterAsUnlocked(self);
+  DCHECK_NE(exclusive_owner_, 0U);
 #if ART_USE_FUTEXES
   bool done = false;
   do {
@@ -536,6 +537,7 @@
     }
   } while (!done);
 #else
+  exclusive_owner_ = 0;
   CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
 #endif
 }
@@ -576,7 +578,6 @@
       num_pending_writers_--;
     }
   } while (!done);
-  exclusive_owner_ = SafeGetTid(self);
 #else
   timespec ts;
   InitTimeSpec(true, CLOCK_REALTIME, ms, ns, &ts);
@@ -589,6 +590,7 @@
     PLOG(FATAL) << "pthread_rwlock_timedwrlock failed for " << name_;
   }
 #endif
+  exclusive_owner_ = SafeGetTid(self);
   RegisterAsLocked(self);
   AssertSharedHeld(self);
   return true;
@@ -654,7 +656,7 @@
   num_waiters_ = 0;
 #else
   pthread_condattr_t cond_attrs;
-  CHECK_MUTEX_CALL(pthread_condattr_init(&cond_attrs));
+  CHECK_MUTEX_CALL(pthread_condattr_init, (&cond_attrs));
 #if !defined(__APPLE__)
   // Apple doesn't have CLOCK_MONOTONIC or pthread_condattr_setclock.
   CHECK_MUTEX_CALL(pthread_condattr_setclock(&cond_attrs, CLOCK_MONOTONIC));
@@ -761,8 +763,11 @@
   CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
   guard_.num_contenders_--;
 #else
+  uint64_t old_owner = guard_.exclusive_owner_;
+  guard_.exclusive_owner_ = 0;
   guard_.recursion_count_ = 0;
   CHECK_MUTEX_CALL(pthread_cond_wait, (&cond_, &guard_.mutex_));
+  guard_.exclusive_owner_ = old_owner;
 #endif
   guard_.recursion_count_ = old_recursion_count;
 }
@@ -802,6 +807,8 @@
 #else
   int clock = CLOCK_REALTIME;
 #endif
+  uint64_t old_owner = guard_.exclusive_owner_;
+  guard_.exclusive_owner_ = 0;
   guard_.recursion_count_ = 0;
   timespec ts;
   InitTimeSpec(true, clock, ms, ns, &ts);
@@ -810,6 +817,7 @@
     errno = rc;
     PLOG(FATAL) << "TimedWait failed for " << name_;
   }
+  guard_.exclusive_owner_ = old_owner;
 #endif
   guard_.recursion_count_ = old_recursion_count;
 }
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 68b450a..1ba6180 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -245,6 +245,7 @@
   AtomicInteger num_contenders_;
 #else
   pthread_mutex_t mutex_;
+  volatile uint64_t exclusive_owner_;  // Guarded by mutex_.
 #endif
   const bool recursive_;  // Can the lock be recursively held?
   unsigned int recursion_count_;
@@ -358,6 +359,7 @@
   AtomicInteger num_pending_writers_;
 #else
   pthread_rwlock_t rwlock_;
+  volatile uint64_t exclusive_owner_;  // Guarded by rwlock_.
 #endif
   DISALLOW_COPY_AND_ASSIGN(ReaderWriterMutex);
 };
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
new file mode 100644
index 0000000..c0bce84
--- /dev/null
+++ b/runtime/base/scoped_flock.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_flock.h"
+
+#include <sys/file.h>
+#include <sys/stat.h>
+
+#include "base/logging.h"
+#include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
+
+namespace art {
+
+bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
+  while (true) {
+    file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
+    if (file_.get() == NULL) {
+      *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
+      return false;
+    }
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
+    if (flock_result != 0) {
+      *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
+      return false;
+    }
+    struct stat fstat_stat;
+    int fstat_result = TEMP_FAILURE_RETRY(fstat(file_->Fd(), &fstat_stat));
+    if (fstat_result != 0) {
+      *error_msg = StringPrintf("Failed to fstat file '%s': %s", filename, strerror(errno));
+      return false;
+    }
+    struct stat stat_stat;
+    int stat_result = TEMP_FAILURE_RETRY(stat(filename, &stat_stat));
+    if (stat_result != 0) {
+      PLOG(WARNING) << "Failed to stat, will retry: " << filename;
+      // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
+      continue;
+    }
+    if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
+      LOG(WARNING) << "File changed while locking, will retry: " << filename;
+      continue;
+    }
+    return true;
+  }
+}
+
+File* ScopedFlock::GetFile() {
+  CHECK(file_.get() != NULL);
+  return file_.get();
+}
+
+ScopedFlock::ScopedFlock() { }
+
+ScopedFlock::~ScopedFlock() {
+  if (file_.get() != NULL) {
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
+    CHECK_EQ(0, flock_result);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
new file mode 100644
index 0000000..26b4eb0
--- /dev/null
+++ b/runtime/base/scoped_flock.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_SCOPED_FLOCK_H_
+#define ART_RUNTIME_BASE_SCOPED_FLOCK_H_
+
+#include <memory>
+#include <string>
+
+#include "base/macros.h"
+#include "os.h"
+
+namespace art {
+
+class ScopedFlock {
+ public:
+  ScopedFlock();
+
+  // Attempts to acquire an exclusive file lock (see flock(2)) on the file
+  // at filename, and blocks until it can do so.
+  //
+  // Returns true if the lock could be acquired, or false if an error
+  // occurred. It is an error if the file does not exist, or if its inode
+  // changed (usually due to a new file being created at the same path)
+  // between attempts to lock it.
+  bool Init(const char* filename, std::string* error_msg);
+
+  // Returns the (locked) file associated with this instance.
+  File* GetFile();
+  ~ScopedFlock();
+ private:
+  std::unique_ptr<File> file_;
+  DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_SCOPED_FLOCK_H_
diff --git a/runtime/base/scoped_flock_test.cc b/runtime/base/scoped_flock_test.cc
new file mode 100644
index 0000000..8fa181a
--- /dev/null
+++ b/runtime/base/scoped_flock_test.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_flock.h"
+#include "common_runtime_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class ScopedFlockTest : public CommonRuntimeTest {};
+
+TEST_F(ScopedFlockTest, TestLocking) {
+  ScratchFile scratch_file;
+  std::string error_msg;
+
+  // NOTE: Locks applied using flock(2) and fcntl(2) are oblivious
+  // to each other, so attempting to query locks set by flock using
+  // using fcntl(,F_GETLK,) will not work. see kernel doc at
+  // Documentation/filesystems/locks.txt.
+  ScopedFlock file_lock;
+  ASSERT_TRUE(file_lock.Init(scratch_file.GetFilename().c_str(),
+                             &error_msg));
+
+  ASSERT_FALSE(file_lock.Init("/guaranteed/not/to/exist", &error_msg));
+}
+
+}  // namespace art
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 87d1c06..6d5b59c 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -69,17 +69,29 @@
 }
 
 int FdFile::Flush() {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(fdatasync(fd_));
+#else
+  int rc = TEMP_FAILURE_RETRY(fsync(fd_));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
 int64_t FdFile::Read(char* buf, int64_t byte_count, int64_t offset) const {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(pread64(fd_, buf, byte_count, offset));
+#else
+  int rc = TEMP_FAILURE_RETRY(pread(fd_, buf, byte_count, offset));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
 int FdFile::SetLength(int64_t new_length) {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(ftruncate64(fd_, new_length));
+#else
+  int rc = TEMP_FAILURE_RETRY(ftruncate(fd_, new_length));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
@@ -90,7 +102,11 @@
 }
 
 int64_t FdFile::Write(const char* buf, int64_t byte_count, int64_t offset) {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(pwrite64(fd_, buf, byte_count, offset));
+#else
+  int rc = TEMP_FAILURE_RETRY(pwrite(fd_, buf, byte_count, offset));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
diff --git a/runtime/base/unix_file/mapped_file.cc b/runtime/base/unix_file/mapped_file.cc
index bc23a74..63927b1 100644
--- a/runtime/base/unix_file/mapped_file.cc
+++ b/runtime/base/unix_file/mapped_file.cc
@@ -61,7 +61,11 @@
 bool MappedFile::MapReadWrite(int64_t file_size) {
   CHECK(IsOpened());
   CHECK(!IsMapped());
+#ifdef __linux__
   int result = TEMP_FAILURE_RETRY(ftruncate64(Fd(), file_size));
+#else
+  int result = TEMP_FAILURE_RETRY(ftruncate(Fd(), file_size));
+#endif
   if (result == -1) {
     PLOG(ERROR) << "Failed to truncate file '" << GetPath()
                 << "' to size " << file_size;
diff --git a/runtime/base/unix_file/mapped_file.h b/runtime/base/unix_file/mapped_file.h
index 28cc551..73056e9 100644
--- a/runtime/base/unix_file/mapped_file.h
+++ b/runtime/base/unix_file/mapped_file.h
@@ -32,8 +32,13 @@
  public:
   // File modes used in Open().
   enum FileMode {
+#ifdef __linux__
     kReadOnlyMode = O_RDONLY | O_LARGEFILE,
     kReadWriteMode = O_CREAT | O_RDWR | O_LARGEFILE,
+#else
+    kReadOnlyMode = O_RDONLY,
+    kReadWriteMode = O_CREAT | O_RDWR,
+#endif
   };
 
   MappedFile() : FdFile(), file_size_(-1), mapped_file_(NULL) {
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index cfd0c00..a816489 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -129,7 +129,7 @@
   // such as NewByteArray.
   // If -verbose:third-party-jni is on, we want to log any JNI function calls
   // made by a third-party native method.
-  std::string class_name(MethodHelper(method).GetDeclaringClassDescriptor());
+  std::string class_name(method->GetDeclaringClassDescriptor());
   if (!vm->trace.empty() && class_name.find(vm->trace) != std::string::npos) {
     return true;
   }
@@ -195,8 +195,9 @@
    */
   void CheckFieldType(jvalue value, jfieldID fid, char prim, bool isStatic)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtField* f = CheckFieldID(fid);
-    if (f == nullptr) {
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::ArtField> f(hs.NewHandle(CheckFieldID(fid)));
+    if (f.Get() == nullptr) {
       return;
     }
     mirror::Class* field_type = FieldHelper(f).GetType();
@@ -215,22 +216,24 @@
         } else {
           if (!obj->InstanceOf(field_type)) {
             JniAbortF(function_name_, "attempt to set field %s with value of wrong type: %s",
-                      PrettyField(f).c_str(), PrettyTypeOf(obj).c_str());
+                      PrettyField(f.Get()).c_str(), PrettyTypeOf(obj).c_str());
             return;
           }
         }
       }
     } else if (field_type != Runtime::Current()->GetClassLinker()->FindPrimitiveClass(prim)) {
       JniAbortF(function_name_, "attempt to set field %s with value of wrong type: %c",
-                PrettyField(f).c_str(), prim);
+                PrettyField(f.Get()).c_str(), prim);
       return;
     }
 
-    if (isStatic != f->IsStatic()) {
+    if (isStatic != f.Get()->IsStatic()) {
       if (isStatic) {
-        JniAbortF(function_name_, "accessing non-static field %s as static", PrettyField(f).c_str());
+        JniAbortF(function_name_, "accessing non-static field %s as static",
+                  PrettyField(f.Get()).c_str());
       } else {
-        JniAbortF(function_name_, "accessing static field %s as non-static", PrettyField(f).c_str());
+        JniAbortF(function_name_, "accessing static field %s as non-static",
+                  PrettyField(f.Get()).c_str());
       }
       return;
     }
@@ -256,8 +259,7 @@
       return;
     }
     mirror::Class* c = o->GetClass();
-    FieldHelper fh(f);
-    if (c->FindInstanceField(fh.GetName(), fh.GetTypeDescriptor()) == nullptr) {
+    if (c->FindInstanceField(f->GetName(), f->GetTypeDescriptor()) == nullptr) {
       JniAbortF(function_name_, "jfieldID %s not valid for an object of class %s",
                 PrettyField(f).c_str(), PrettyTypeOf(o).c_str());
     }
@@ -282,7 +284,7 @@
     if (m == nullptr) {
       return;
     }
-    if (*expectedType != MethodHelper(m).GetShorty()[0]) {
+    if (*expectedType != m->GetShorty()[0]) {
       JniAbortF(function_name_, "the return type of %s does not match %s",
                 function_name_, PrettyMethod(m).c_str());
     }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b9c42ee..7385382 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -27,6 +27,7 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
@@ -502,29 +503,24 @@
       FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
 
   mirror::ArtField* pendingNext = java_lang_ref_Reference->GetInstanceField(0);
-  FieldHelper fh(pendingNext);
-  CHECK_STREQ(fh.GetName(), "pendingNext");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
+  CHECK_STREQ(pendingNext->GetName(), "pendingNext");
+  CHECK_STREQ(pendingNext->GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
   mirror::ArtField* queue = java_lang_ref_Reference->GetInstanceField(1);
-  fh.ChangeField(queue);
-  CHECK_STREQ(fh.GetName(), "queue");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/ReferenceQueue;");
+  CHECK_STREQ(queue->GetName(), "queue");
+  CHECK_STREQ(queue->GetTypeDescriptor(), "Ljava/lang/ref/ReferenceQueue;");
 
   mirror::ArtField* queueNext = java_lang_ref_Reference->GetInstanceField(2);
-  fh.ChangeField(queueNext);
-  CHECK_STREQ(fh.GetName(), "queueNext");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
+  CHECK_STREQ(queueNext->GetName(), "queueNext");
+  CHECK_STREQ(queueNext->GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
   mirror::ArtField* referent = java_lang_ref_Reference->GetInstanceField(3);
-  fh.ChangeField(referent);
-  CHECK_STREQ(fh.GetName(), "referent");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/Object;");
+  CHECK_STREQ(referent->GetName(), "referent");
+  CHECK_STREQ(referent->GetTypeDescriptor(), "Ljava/lang/Object;");
 
   mirror::ArtField* zombie = java_lang_ref_FinalizerReference->GetInstanceField(2);
-  fh.ChangeField(zombie);
-  CHECK_STREQ(fh.GetName(), "zombie");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/Object;");
+  CHECK_STREQ(zombie->GetName(), "zombie");
+  CHECK_STREQ(zombie->GetTypeDescriptor(), "Ljava/lang/Object;");
 
   // ensure all class_roots_ are initialized
   for (size_t i = 0; i < kClassRootsMax; i++) {
@@ -706,60 +702,6 @@
   return dex_file;
 }
 
-class ScopedFlock {
- public:
-  ScopedFlock() {}
-
-  bool Init(const char* filename, std::string* error_msg) {
-    while (true) {
-      file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
-      if (file_.get() == NULL) {
-        *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
-        return false;
-      }
-      int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
-      if (flock_result != 0) {
-        *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
-        return false;
-      }
-      struct stat fstat_stat;
-      int fstat_result = TEMP_FAILURE_RETRY(fstat(file_->Fd(), &fstat_stat));
-      if (fstat_result != 0) {
-        *error_msg = StringPrintf("Failed to fstat file '%s': %s", filename, strerror(errno));
-        return false;
-      }
-      struct stat stat_stat;
-      int stat_result = TEMP_FAILURE_RETRY(stat(filename, &stat_stat));
-      if (stat_result != 0) {
-        PLOG(WARNING) << "Failed to stat, will retry: " << filename;
-        // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
-        continue;
-      }
-      if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
-        LOG(WARNING) << "File changed while locking, will retry: " << filename;
-        continue;
-      }
-      return true;
-    }
-  }
-
-  File& GetFile() {
-    return *file_;
-  }
-
-  ~ScopedFlock() {
-    if (file_.get() != NULL) {
-      int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
-      CHECK_EQ(0, flock_result);
-    }
-  }
-
- private:
-  std::unique_ptr<File> file_;
-
-  DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
-};
-
 const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(
     const char* dex_location,
     uint32_t dex_location_checksum,
@@ -790,7 +732,7 @@
 
   // Generate the output oat file for the dex file
   VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location;
-  if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, &error_msg)) {
+  if (!GenerateOatFile(dex_location, scoped_flock.GetFile()->Fd(), oat_location, &error_msg)) {
     CHECK(!error_msg.empty());
     error_msgs->push_back(error_msg);
     return nullptr;
@@ -2812,7 +2754,7 @@
 }
 
 static void CheckProxyConstructor(mirror::ArtMethod* constructor);
-static void CheckProxyMethod(mirror::ArtMethod* method,
+static void CheckProxyMethod(Handle<mirror::ArtMethod> method,
                              Handle<mirror::ArtMethod> prototype);
 
 mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
@@ -2934,11 +2876,12 @@
     CHECK(klass->GetIFields() == nullptr);
     CheckProxyConstructor(klass->GetDirectMethod(0));
     for (size_t i = 0; i < num_virtual_methods; ++i) {
-      StackHandleScope<1> hs(self);
+      StackHandleScope<2> hs(self);
       mirror::ObjectArray<mirror::ArtMethod>* decoded_methods =
           soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods);
       Handle<mirror::ArtMethod> prototype(hs.NewHandle(decoded_methods->Get(i)));
-      CheckProxyMethod(klass->GetVirtualMethod(i), prototype);
+      Handle<mirror::ArtMethod> virtual_method(hs.NewHandle(klass->GetVirtualMethod(i)));
+      CheckProxyMethod(virtual_method, prototype);
     }
 
     mirror::String* decoded_name = soa.Decode<mirror::String*>(name);
@@ -3019,9 +2962,9 @@
 static void CheckProxyConstructor(mirror::ArtMethod* constructor)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   CHECK(constructor->IsConstructor());
-  MethodHelper mh(constructor);
-  CHECK_STREQ(mh.GetName(), "<init>");
-  CHECK_STREQ(mh.GetSignature().ToString().c_str(), "(Ljava/lang/reflect/InvocationHandler;)V");
+  CHECK_STREQ(constructor->GetName(), "<init>");
+  CHECK_STREQ(constructor->GetSignature().ToString().c_str(),
+              "(Ljava/lang/reflect/InvocationHandler;)V");
   DCHECK(constructor->IsPublic());
 }
 
@@ -3054,7 +2997,7 @@
   return method;
 }
 
-static void CheckProxyMethod(mirror::ArtMethod* method, Handle<mirror::ArtMethod> prototype)
+static void CheckProxyMethod(Handle<mirror::ArtMethod> method, Handle<mirror::ArtMethod> prototype)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Basic sanity
   CHECK(!prototype->IsFinal());
@@ -3069,9 +3012,9 @@
   CHECK_EQ(prototype->GetDexMethodIndex(), method->GetDexMethodIndex());
 
   MethodHelper mh(method);
-  MethodHelper mh2(prototype.Get());
-  CHECK_STREQ(mh.GetName(), mh2.GetName());
-  CHECK_STREQ(mh.GetShorty(), mh2.GetShorty());
+  MethodHelper mh2(prototype);
+  CHECK_STREQ(method->GetName(), prototype->GetName());
+  CHECK_STREQ(method->GetShorty(), prototype->GetShorty());
   // More complex sanity - via dex cache
   CHECK_EQ(mh.GetReturnType(), mh2.GetReturnType());
 }
@@ -3326,16 +3269,18 @@
     return true;
   }
   // Begin with the methods local to the superclass.
-  MethodHelper mh;
-  MethodHelper super_mh;
+  StackHandleScope<2> hs(Thread::Current());
+  MethodHelper mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
+  MethodHelper super_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
   if (klass->HasSuperClass() &&
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
     for (int i = klass->GetSuperClass()->GetVTable()->GetLength() - 1; i >= 0; --i) {
       mh.ChangeMethod(klass->GetVTable()->GetWithoutChecks(i));
       super_mh.ChangeMethod(klass->GetSuperClass()->GetVTable()->GetWithoutChecks(i));
-      bool is_override = mh.GetMethod() != super_mh.GetMethod();
-      if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
-        ThrowLinkageError(klass.Get(), "Class %s method %s resolves differently in superclass %s",
+      if (mh.GetMethod() != super_mh.GetMethod() &&
+          !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
+        ThrowLinkageError(klass.Get(),
+                          "Class %s method %s resolves differently in superclass %s",
                           PrettyDescriptor(klass.Get()).c_str(),
                           PrettyMethod(mh.GetMethod()).c_str(),
                           PrettyDescriptor(klass->GetSuperClass()).c_str());
@@ -3349,9 +3294,10 @@
       for (uint32_t j = 0; j < num_methods; ++j) {
         mh.ChangeMethod(klass->GetIfTable()->GetMethodArray(i)->GetWithoutChecks(j));
         super_mh.ChangeMethod(klass->GetIfTable()->GetInterface(i)->GetVirtualMethod(j));
-        bool is_override = mh.GetMethod() != super_mh.GetMethod();
-        if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
-          ThrowLinkageError(klass.Get(), "Class %s method %s resolves differently in interface %s",
+        if (mh.GetMethod() != super_mh.GetMethod() &&
+            !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
+          ThrowLinkageError(klass.Get(),
+                            "Class %s method %s resolves differently in interface %s",
                             PrettyDescriptor(klass.Get()).c_str(),
                             PrettyMethod(mh.GetMethod()).c_str(),
                             PrettyDescriptor(klass->GetIfTable()->GetInterface(i)).c_str());
@@ -3393,7 +3339,7 @@
   if (!LinkSuperClass(klass)) {
     return false;
   }
-  if (!LinkMethods(klass, interfaces)) {
+  if (!LinkMethods(self, klass, interfaces)) {
     return false;
   }
   if (!LinkInstanceFields(klass)) {
@@ -3512,7 +3458,7 @@
 }
 
 // Populate the class vtable and itable. Compute return type indices.
-bool ClassLinker::LinkMethods(Handle<mirror::Class> klass,
+bool ClassLinker::LinkMethods(Thread* self, Handle<mirror::Class> klass,
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   if (klass->IsInterface()) {
     // No vtable.
@@ -3528,20 +3474,19 @@
     return LinkInterfaceMethods(klass, interfaces);
   } else {
     // Link virtual and interface method tables
-    return LinkVirtualMethods(klass) && LinkInterfaceMethods(klass, interfaces);
+    return LinkVirtualMethods(self, klass) && LinkInterfaceMethods(klass, interfaces);
   }
   return true;
 }
 
-bool ClassLinker::LinkVirtualMethods(Handle<mirror::Class> klass) {
-  Thread* self = Thread::Current();
+bool ClassLinker::LinkVirtualMethods(Thread* self, Handle<mirror::Class> klass) {
   if (klass->HasSuperClass()) {
-    uint32_t max_count = (klass->NumVirtualMethods() +
-                          klass->GetSuperClass()->GetVTable()->GetLength());
+    uint32_t max_count = klass->NumVirtualMethods() +
+        klass->GetSuperClass()->GetVTable()->GetLength();
     size_t actual_count = klass->GetSuperClass()->GetVTable()->GetLength();
     CHECK_LE(actual_count, max_count);
     // TODO: do not assign to the vtable field until it is fully constructed.
-    StackHandleScope<1> hs(self);
+    StackHandleScope<3> hs(self);
     Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable(
         hs.NewHandle(klass->GetSuperClass()->GetVTable()->CopyOf(self, max_count)));
     if (UNLIKELY(vtable.Get() == NULL)) {
@@ -3549,20 +3494,22 @@
       return false;
     }
     // See if any of our virtual methods override the superclass.
+    MethodHelper local_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
+    MethodHelper super_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
     for (size_t i = 0; i < klass->NumVirtualMethods(); ++i) {
       mirror::ArtMethod* local_method = klass->GetVirtualMethodDuringLinking(i);
-      MethodHelper local_mh(local_method);
+      local_mh.ChangeMethod(local_method);
       size_t j = 0;
       for (; j < actual_count; ++j) {
         mirror::ArtMethod* super_method = vtable->Get(j);
-        MethodHelper super_mh(super_method);
+        super_mh.ChangeMethod(super_method);
         if (local_mh.HasSameNameAndSignature(&super_mh)) {
           if (klass->CanAccessMember(super_method->GetDeclaringClass(),
                                      super_method->GetAccessFlags())) {
             if (super_method->IsFinal()) {
               ThrowLinkageError(klass.Get(), "Method %s overrides final method in class %s",
                                 PrettyMethod(local_method).c_str(),
-                                super_mh.GetDeclaringClassDescriptor());
+                                super_method->GetDeclaringClassDescriptor());
               return false;
             }
             vtable->Set<false>(j, local_method);
@@ -3571,7 +3518,7 @@
           } else {
             LOG(WARNING) << "Before Android 4.1, method " << PrettyMethod(local_method)
                          << " would have incorrectly overridden the package-private method in "
-                         << PrettyDescriptor(super_mh.GetDeclaringClassDescriptor());
+                         << PrettyDescriptor(super_method->GetDeclaringClassDescriptor());
           }
         }
       }
@@ -3597,7 +3544,7 @@
     }
     klass->SetVTable(vtable.Get());
   } else {
-    CHECK(klass.Get() == GetClassRoot(kJavaLangObject));
+    CHECK_EQ(klass.Get(), GetClassRoot(kJavaLangObject));
     uint32_t num_virtual_methods = klass->NumVirtualMethods();
     if (!IsUint(16, num_virtual_methods)) {
       ThrowClassFormatError(klass.Get(), "Too many methods: %d", num_virtual_methods);
@@ -3623,8 +3570,9 @@
 bool ClassLinker::LinkInterfaceMethods(Handle<mirror::Class> klass,
                                        Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   Thread* const self = Thread::Current();
+  Runtime* const runtime = Runtime::Current();
   // Set the imt table to be all conflicts by default.
-  klass->SetImTable(Runtime::Current()->GetDefaultImt());
+  klass->SetImTable(runtime->GetDefaultImt());
   size_t super_ifcount;
   if (klass->HasSuperClass()) {
     super_ifcount = klass->GetSuperClass()->GetIfTableCount();
@@ -3662,7 +3610,7 @@
       return true;
     }
   }
-  StackHandleScope<2> hs(self);
+  StackHandleScope<4> hs(self);
   Handle<mirror::IfTable> iftable(hs.NewHandle(AllocIfTable(self, ifcount)));
   if (UNLIKELY(iftable.Get() == NULL)) {
     CHECK(self->IsExceptionPending());  // OOME.
@@ -3742,6 +3690,8 @@
     CHECK(self->IsExceptionPending());  // OOME.
     return false;
   }
+  MethodHelper interface_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
+  MethodHelper vtable_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
   std::vector<mirror::ArtMethod*> miranda_list;
   for (size_t i = 0; i < ifcount; ++i) {
     size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
@@ -3758,7 +3708,7 @@
           hs.NewHandle(klass->GetVTableDuringLinking()));
       for (size_t j = 0; j < num_methods; ++j) {
         mirror::ArtMethod* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j);
-        MethodHelper interface_mh(interface_method);
+        interface_mh.ChangeMethod(interface_method);
         int32_t k;
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -3770,7 +3720,7 @@
         // matter which direction we go.  We walk it backward anyway.)
         for (k = vtable->GetLength() - 1; k >= 0; --k) {
           mirror::ArtMethod* vtable_method = vtable->Get(k);
-          MethodHelper vtable_mh(vtable_method);
+          vtable_mh.ChangeMethod(vtable_method);
           if (interface_mh.HasSameNameAndSignature(&vtable_mh)) {
             if (!vtable_method->IsAbstract() && !vtable_method->IsPublic()) {
               ThrowIllegalAccessError(
@@ -3787,7 +3737,7 @@
               imtable->Set<false>(imt_index, vtable_method);
               imtable_changed = true;
             } else {
-              imtable->Set<false>(imt_index, Runtime::Current()->GetImtConflictMethod());
+              imtable->Set<false>(imt_index, runtime->GetImtConflictMethod());
             }
             break;
           }
@@ -3795,11 +3745,10 @@
         if (k < 0) {
           StackHandleScope<1> hs(self);
           auto miranda_method = hs.NewHandle<mirror::ArtMethod>(nullptr);
-          for (size_t mir = 0; mir < miranda_list.size(); mir++) {
-            mirror::ArtMethod* mir_method = miranda_list[mir];
-            MethodHelper vtable_mh(mir_method);
+          for (mirror::ArtMethod* mir_method : miranda_list) {
+            vtable_mh.ChangeMethod(mir_method);
             if (interface_mh.HasSameNameAndSignature(&vtable_mh)) {
-              miranda_method.Assign(miranda_list[mir]);
+              miranda_method.Assign(mir_method);
               break;
             }
           }
@@ -3820,7 +3769,7 @@
   }
   if (imtable_changed) {
     // Fill in empty entries in interface method table with conflict.
-    mirror::ArtMethod* imt_conflict_method = Runtime::Current()->GetImtConflictMethod();
+    mirror::ArtMethod* imt_conflict_method = runtime->GetImtConflictMethod();
     for (size_t i = 0; i < kImtSize; i++) {
       if (imtable->Get(i) == NULL) {
         imtable->Set<false>(i, imt_conflict_method);
@@ -3896,10 +3845,8 @@
   bool operator()(mirror::ArtField* field1, mirror::ArtField* field2)
       NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, and finally 32-bit
-    FieldHelper fh1(field1);
-    Primitive::Type type1 = fh1.GetTypeAsPrimitiveType();
-    FieldHelper fh2(field2);
-    Primitive::Type type2 = fh2.GetTypeAsPrimitiveType();
+    Primitive::Type type1 = field1->GetTypeAsPrimitiveType();
+    Primitive::Type type2 = field2->GetTypeAsPrimitiveType();
     if (type1 != type2) {
       bool is_primitive1 = type1 != Primitive::kPrimNot;
       bool is_primitive2 = type2 != Primitive::kPrimNot;
@@ -3914,9 +3861,7 @@
       }
     }
     // same basic group? then sort by string.
-    const char* name1 = fh1.GetName();
-    const char* name2 = fh2.GetName();
-    return strcmp(name1, name2) < 0;
+    return strcmp(field1->GetName(), field2->GetName()) < 0;
   }
 };
 
@@ -3961,8 +3906,7 @@
   size_t num_reference_fields = 0;
   for (; current_field < num_fields; current_field++) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
-    FieldHelper fh(field);
-    Primitive::Type type = fh.GetTypeAsPrimitiveType();
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
     bool isPrimitive = type != Primitive::kPrimNot;
     if (isPrimitive) {
       break;  // past last reference, move on to the next phase
@@ -3980,8 +3924,7 @@
   if (current_field != num_fields && !IsAligned<8>(field_offset.Uint32Value())) {
     for (size_t i = 0; i < grouped_and_sorted_fields.size(); i++) {
       mirror::ArtField* field = grouped_and_sorted_fields[i];
-      FieldHelper fh(field);
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = field->GetTypeAsPrimitiveType();
       CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
       if (type == Primitive::kPrimLong || type == Primitive::kPrimDouble) {
         continue;
@@ -4003,8 +3946,7 @@
   while (!grouped_and_sorted_fields.empty()) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
     grouped_and_sorted_fields.pop_front();
-    FieldHelper fh(field);
-    Primitive::Type type = fh.GetTypeAsPrimitiveType();
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
     CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
     fields->Set<false>(current_field, field);
     field->SetOffset(field_offset);
@@ -4020,8 +3962,7 @@
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
     CHECK_EQ(num_reference_fields, num_fields) << PrettyClass(klass.Get());
-    FieldHelper fh(fields->Get(num_fields - 1));
-    CHECK_STREQ(fh.GetName(), "referent") << PrettyClass(klass.Get());
+    CHECK_STREQ(fields->Get(num_fields - 1)->GetName(), "referent") << PrettyClass(klass.Get());
     --num_reference_fields;
   }
 
@@ -4038,11 +3979,10 @@
                     << " offset="
                     << field->GetField32(MemberOffset(mirror::ArtField::OffsetOffset()));
       }
-      FieldHelper fh(field);
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = field->GetTypeAsPrimitiveType();
       bool is_primitive = type != Primitive::kPrimNot;
       if (klass->DescriptorEquals("Ljava/lang/ref/Reference;") &&
-          strcmp("referent", fh.GetName()) == 0) {
+          strcmp("referent", field->GetName()) == 0) {
         is_primitive = true;  // We lied above, so we have to expect a lie here.
       }
       if (is_primitive) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index ccf0558..a1d7bc6 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -499,11 +499,11 @@
   bool LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkMethods(Handle<mirror::Class> klass,
+  bool LinkMethods(Thread* self, Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkVirtualMethods(Handle<mirror::Class> klass)
+  bool LinkVirtualMethods(Thread* self, Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkInterfaceMethods(Handle<mirror::Class> klass,
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index e397a5c..04f6946 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -152,11 +152,10 @@
   }
 
   void AssertMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    MethodHelper mh(method);
     EXPECT_TRUE(method != NULL);
     EXPECT_TRUE(method->GetClass() != NULL);
-    EXPECT_TRUE(mh.GetName() != NULL);
-    EXPECT_TRUE(mh.GetSignature() != Signature::NoSignature());
+    EXPECT_TRUE(method->GetName() != NULL);
+    EXPECT_TRUE(method->GetSignature() != Signature::NoSignature());
 
     EXPECT_TRUE(method->GetDexCacheStrings() != NULL);
     EXPECT_TRUE(method->GetDexCacheResolvedMethods() != NULL);
@@ -171,11 +170,12 @@
 
   void AssertField(mirror::Class* klass, mirror::ArtField* field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    FieldHelper fh(field);
     EXPECT_TRUE(field != NULL);
     EXPECT_TRUE(field->GetClass() != NULL);
     EXPECT_EQ(klass, field->GetDeclaringClass());
-    EXPECT_TRUE(fh.GetName() != NULL);
+    EXPECT_TRUE(field->GetName() != NULL);
+    StackHandleScope<1> hs(Thread::Current());
+    FieldHelper fh(hs.NewHandle(field));
     EXPECT_TRUE(fh.GetType() != NULL);
   }
 
@@ -202,8 +202,7 @@
     if (klass->IsInterface()) {
       EXPECT_TRUE(klass->IsAbstract());
       if (klass->NumDirectMethods() == 1) {
-        MethodHelper mh(klass->GetDirectMethod(0));
-        EXPECT_TRUE(mh.IsClassInitializer());
+        EXPECT_TRUE(klass->GetDirectMethod(0)->IsClassInitializer());
         EXPECT_TRUE(klass->GetDirectMethod(0)->IsDirect());
       } else {
         EXPECT_EQ(0U, klass->NumDirectMethods());
@@ -269,11 +268,12 @@
 
     // Confirm that all instances fields are packed together at the start
     EXPECT_GE(klass->NumInstanceFields(), klass->NumReferenceInstanceFields());
-    FieldHelper fh;
+    StackHandleScope<1> hs(Thread::Current());
+    FieldHelper fh(hs.NewHandle<mirror::ArtField>(nullptr));
     for (size_t i = 0; i < klass->NumReferenceInstanceFields(); i++) {
       mirror::ArtField* field = klass->GetInstanceField(i);
       fh.ChangeField(field);
-      ASSERT_TRUE(!fh.IsPrimitiveType());
+      ASSERT_TRUE(!field->IsPrimitiveType());
       mirror::Class* field_type = fh.GetType();
       ASSERT_TRUE(field_type != NULL);
       ASSERT_TRUE(!field_type->IsPrimitive());
@@ -283,10 +283,10 @@
       fh.ChangeField(field);
       mirror::Class* field_type = fh.GetType();
       ASSERT_TRUE(field_type != NULL);
-      if (!fh.IsPrimitiveType() || !field_type->IsPrimitive()) {
+      if (!fh.GetField()->IsPrimitiveType() || !field_type->IsPrimitive()) {
         // While Reference.referent is not primitive, the ClassLinker
         // treats it as such so that the garbage collector won't scan it.
-        EXPECT_EQ(PrettyField(field), "java.lang.Object java.lang.ref.Reference.referent");
+        EXPECT_EQ(PrettyField(fh.GetField()), "java.lang.Object java.lang.ref.Reference.referent");
       }
     }
 
@@ -390,11 +390,9 @@
       error = true;
     }
 
-    FieldHelper fh;
     for (size_t i = 0; i < offsets.size(); i++) {
       mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
-      fh.ChangeField(field);
-      StringPiece field_name(fh.GetName());
+      StringPiece field_name(field->GetName());
       if (field_name != offsets[i].java_name) {
         error = true;
       }
@@ -403,8 +401,7 @@
       for (size_t i = 0; i < offsets.size(); i++) {
         CheckOffset& offset = offsets[i];
         mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
-        fh.ChangeField(field);
-        StringPiece field_name(fh.GetName());
+        StringPiece field_name(field->GetName());
         if (field_name != offsets[i].java_name) {
           LOG(ERROR) << "JAVA FIELD ORDER MISMATCH NEXT LINE:";
         }
@@ -731,15 +728,11 @@
   } else {
     EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
   }
-  FieldHelper fh(JavaLangObject->GetInstanceField(0));
-  EXPECT_STREQ(fh.GetName(), "shadow$_klass_");
-  fh.ChangeField(JavaLangObject->GetInstanceField(1));
-  EXPECT_STREQ(fh.GetName(), "shadow$_monitor_");
+  EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
+  EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
   if (kUseBakerOrBrooksReadBarrier) {
-    fh.ChangeField(JavaLangObject->GetInstanceField(2));
-    EXPECT_STREQ(fh.GetName(), "shadow$_x_rb_ptr_");
-    fh.ChangeField(JavaLangObject->GetInstanceField(3));
-    EXPECT_STREQ(fh.GetName(), "shadow$_x_xpadding_");
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
   }
 
   EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
@@ -850,29 +843,21 @@
   NullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
-  FieldHelper fh(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Byte;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Character;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Double;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Float;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Integer;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Long;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Short;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
 }
 
 TEST_F(ClassLinkerTest, TwoClassLoadersOneClass) {
@@ -907,58 +892,49 @@
   EXPECT_EQ(9U, statics->NumStaticFields());
 
   mirror::ArtField* s0 = mirror::Class::FindStaticField(soa.Self(), statics, "s0", "Z");
-  FieldHelper fh(s0);
   EXPECT_STREQ(s0->GetClass()->GetDescriptor().c_str(), "Ljava/lang/reflect/ArtField;");
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimBoolean);
+  EXPECT_EQ(s0->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   EXPECT_EQ(true, s0->GetBoolean(statics.Get()));
   s0->SetBoolean<false>(statics.Get(), false);
 
   mirror::ArtField* s1 = mirror::Class::FindStaticField(soa.Self(), statics, "s1", "B");
-  fh.ChangeField(s1);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimByte);
+  EXPECT_EQ(s1->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   EXPECT_EQ(5, s1->GetByte(statics.Get()));
   s1->SetByte<false>(statics.Get(), 6);
 
   mirror::ArtField* s2 = mirror::Class::FindStaticField(soa.Self(), statics, "s2", "C");
-  fh.ChangeField(s2);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimChar);
+  EXPECT_EQ(s2->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   EXPECT_EQ('a', s2->GetChar(statics.Get()));
   s2->SetChar<false>(statics.Get(), 'b');
 
   mirror::ArtField* s3 = mirror::Class::FindStaticField(soa.Self(), statics, "s3", "S");
-  fh.ChangeField(s3);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimShort);
+  EXPECT_EQ(s3->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   EXPECT_EQ(-536, s3->GetShort(statics.Get()));
   s3->SetShort<false>(statics.Get(), -535);
 
   mirror::ArtField* s4 = mirror::Class::FindStaticField(soa.Self(), statics, "s4", "I");
-  fh.ChangeField(s4);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimInt);
+  EXPECT_EQ(s4->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   EXPECT_EQ(2000000000, s4->GetInt(statics.Get()));
   s4->SetInt<false>(statics.Get(), 2000000001);
 
   mirror::ArtField* s5 = mirror::Class::FindStaticField(soa.Self(), statics, "s5", "J");
-  fh.ChangeField(s5);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimLong);
+  EXPECT_EQ(s5->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   EXPECT_EQ(0x1234567890abcdefLL, s5->GetLong(statics.Get()));
   s5->SetLong<false>(statics.Get(), INT64_C(0x34567890abcdef12));
 
   mirror::ArtField* s6 = mirror::Class::FindStaticField(soa.Self(), statics, "s6", "F");
-  fh.ChangeField(s6);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimFloat);
+  EXPECT_EQ(s6->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   EXPECT_EQ(0.5, s6->GetFloat(statics.Get()));
   s6->SetFloat<false>(statics.Get(), 0.75);
 
   mirror::ArtField* s7 = mirror::Class::FindStaticField(soa.Self(), statics, "s7", "D");
-  fh.ChangeField(s7);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimDouble);
+  EXPECT_EQ(s7->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   EXPECT_EQ(16777217, s7->GetDouble(statics.Get()));
   s7->SetDouble<false>(statics.Get(), 16777219);
 
   mirror::ArtField* s8 = mirror::Class::FindStaticField(soa.Self(), statics, "s8",
                                                         "Ljava/lang/String;");
-  fh.ChangeField(s8);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimNot);
+  EXPECT_EQ(s8->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
   EXPECT_TRUE(s8->GetObject(statics.Get())->AsString()->Equals("android"));
   s8->SetObject<false>(s8->GetDeclaringClass(),
                        mirror::String::AllocFromModifiedUtf8(soa.Self(), "robot"));
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index a3e3cfa..8de3068 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -363,7 +363,7 @@
 }
 
 void ThrowNullPointerExceptionFromDexPC(const ThrowLocation& throw_location) {
-  const DexFile::CodeItem* code = MethodHelper(throw_location.GetMethod()).GetCodeItem();
+  const DexFile::CodeItem* code = throw_location.GetMethod()->GetCodeItem();
   uint32_t throw_dex_pc = throw_location.GetDexPc();
   CHECK_LT(throw_dex_pc, code->insns_size_in_code_units_);
   const Instruction* instr = Instruction::At(&code->insns_[throw_dex_pc]);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 07cb9d1..50e9624 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -62,45 +62,98 @@
 static const size_t kMaxAllocRecordStackDepth = 16;  // Max 255.
 static const size_t kDefaultNumAllocRecords = 64*1024;  // Must be a power of 2.
 
-struct AllocRecordStackTraceElement {
-  mirror::ArtMethod* method;
-  uint32_t dex_pc;
-
-  AllocRecordStackTraceElement() : method(nullptr), dex_pc(0) {
+class AllocRecordStackTraceElement {
+ public:
+  AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) {
   }
 
-  int32_t LineNumber() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return MethodHelper(method).GetLineNumFromDexPC(dex_pc);
+  int32_t LineNumber() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* method = Method();
+    DCHECK(method != nullptr);
+    return method->GetLineNumFromDexPC(DexPc());
   }
+
+  mirror::ArtMethod* Method() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(
+        Thread::Current()->DecodeJObject(method_));
+    return method;
+  }
+
+  void SetMethod(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    JNIEnv* env = soa.Env();
+    if (method_ != nullptr) {
+      env->DeleteWeakGlobalRef(method_);
+    }
+    method_ = env->NewWeakGlobalRef(soa.AddLocalReference<jobject>(m));
+  }
+
+  uint32_t DexPc() const {
+    return dex_pc_;
+  }
+
+  void SetDexPc(uint32_t pc) {
+    dex_pc_ = pc;
+  }
+
+ private:
+  jobject method_;  // This is a weak global.
+  uint32_t dex_pc_;
 };
 
-struct AllocRecord {
-  mirror::Class* type;
-  size_t byte_count;
-  uint16_t thin_lock_id;
-  AllocRecordStackTraceElement stack[kMaxAllocRecordStackDepth];  // Unused entries have NULL method.
+class AllocRecord {
+ public:
+  AllocRecord() : type_(nullptr), byte_count_(0), thin_lock_id_(0) {}
 
-  size_t GetDepth() {
+  mirror::Class* Type() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Class* type = reinterpret_cast<mirror::Class*>(
+        Thread::Current()->DecodeJObject(type_));
+    return type;
+  }
+
+  void SetType(mirror::Class* t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    JNIEnv* env = soa.Env();
+    if (type_ != nullptr) {
+      env->DeleteWeakGlobalRef(type_);
+    }
+    type_ = env->NewWeakGlobalRef(soa.AddLocalReference<jobject>(t));
+  }
+
+  size_t GetDepth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     size_t depth = 0;
-    while (depth < kMaxAllocRecordStackDepth && stack[depth].method != NULL) {
+    while (depth < kMaxAllocRecordStackDepth && stack_[depth].Method() != NULL) {
       ++depth;
     }
     return depth;
   }
 
-  void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (type != nullptr) {
-      type = down_cast<mirror::Class*>(callback(type, arg));
-    }
-    for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
-      mirror::ArtMethod*& m = stack[stack_frame].method;
-      if (m == nullptr) {
-        break;
-      }
-      m = down_cast<mirror::ArtMethod*>(callback(m, arg));
-    }
+  size_t ByteCount() const {
+    return byte_count_;
   }
+
+  void SetByteCount(size_t count) {
+    byte_count_ = count;
+  }
+
+  uint16_t ThinLockId() const {
+    return thin_lock_id_;
+  }
+
+  void SetThinLockId(uint16_t id) {
+    thin_lock_id_ = id;
+  }
+
+  AllocRecordStackTraceElement* StackElement(size_t index) {
+    DCHECK_LT(index, kMaxAllocRecordStackDepth);
+    return &stack_[index];
+  }
+
+ private:
+  jobject type_;  // This is a weak global.
+  size_t byte_count_;
+  uint16_t thin_lock_id_;
+  AllocRecordStackTraceElement stack_[kMaxAllocRecordStackDepth];  // Unused entries have NULL method.
 };
 
 struct Breakpoint {
@@ -848,21 +901,13 @@
 JDWP::JdwpError Dbg::GetOwnedMonitors(JDWP::ObjectId thread_id,
                                       std::vector<JDWP::ObjectId>& monitors,
                                       std::vector<uint32_t>& stack_depths) {
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
-  Thread* thread;
-  JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
-  if (error != JDWP::ERR_NONE) {
-    return error;
-  }
-  if (!IsSuspendedForDebugger(soa, thread)) {
-    return JDWP::ERR_THREAD_NOT_SUSPENDED;
-  }
-
   struct OwnedMonitorVisitor : public StackVisitor {
-    OwnedMonitorVisitor(Thread* thread, Context* context)
+    OwnedMonitorVisitor(Thread* thread, Context* context,
+                        std::vector<JDWP::ObjectId>* monitor_vector,
+                        std::vector<uint32_t>* stack_depth_vector)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, context), current_stack_depth(0) {}
+      : StackVisitor(thread, context), current_stack_depth(0),
+        monitors(monitor_vector), stack_depths(stack_depth_vector) {}
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
@@ -874,43 +919,55 @@
       return true;
     }
 
-    static void AppendOwnedMonitors(mirror::Object* owned_monitor, void* arg) {
+    static void AppendOwnedMonitors(mirror::Object* owned_monitor, void* arg)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       OwnedMonitorVisitor* visitor = reinterpret_cast<OwnedMonitorVisitor*>(arg);
-      visitor->monitors.push_back(owned_monitor);
-      visitor->stack_depths.push_back(visitor->current_stack_depth);
+      visitor->monitors->push_back(gRegistry->Add(owned_monitor));
+      visitor->stack_depths->push_back(visitor->current_stack_depth);
     }
 
     size_t current_stack_depth;
-    std::vector<mirror::Object*> monitors;
-    std::vector<uint32_t> stack_depths;
+    std::vector<JDWP::ObjectId>* monitors;
+    std::vector<uint32_t>* stack_depths;
   };
-  std::unique_ptr<Context> context(Context::Create());
-  OwnedMonitorVisitor visitor(thread, context.get());
-  visitor.WalkStack();
 
-  for (size_t i = 0; i < visitor.monitors.size(); ++i) {
-    monitors.push_back(gRegistry->Add(visitor.monitors[i]));
-    stack_depths.push_back(visitor.stack_depths[i]);
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread;
+  {
+    MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
+    JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
+    if (error != JDWP::ERR_NONE) {
+      return error;
+    }
+    if (!IsSuspendedForDebugger(soa, thread)) {
+      return JDWP::ERR_THREAD_NOT_SUSPENDED;
+    }
   }
-
+  std::unique_ptr<Context> context(Context::Create());
+  OwnedMonitorVisitor visitor(thread, context.get(), &monitors, &stack_depths);
+  visitor.WalkStack();
   return JDWP::ERR_NONE;
 }
 
 JDWP::JdwpError Dbg::GetContendedMonitor(JDWP::ObjectId thread_id,
                                          JDWP::ObjectId& contended_monitor) {
+  mirror::Object* contended_monitor_obj;
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
-  Thread* thread;
-  JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
-  if (error != JDWP::ERR_NONE) {
-    return error;
+  {
+    MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
+    Thread* thread;
+    JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
+    if (error != JDWP::ERR_NONE) {
+      return error;
+    }
+    if (!IsSuspendedForDebugger(soa, thread)) {
+      return JDWP::ERR_THREAD_NOT_SUSPENDED;
+    }
+    contended_monitor_obj = Monitor::GetContendedMonitor(thread);
   }
-  if (!IsSuspendedForDebugger(soa, thread)) {
-    return JDWP::ERR_THREAD_NOT_SUSPENDED;
-  }
-
-  contended_monitor = gRegistry->Add(Monitor::GetContendedMonitor(thread));
-
+  // Add() requires the thread_list_lock_ not held to avoid the lock
+  // level violation.
+  contended_monitor = gRegistry->Add(contended_monitor_obj);
   return JDWP::ERR_NONE;
 }
 
@@ -1373,13 +1430,12 @@
 std::string Dbg::GetMethodName(JDWP::MethodId method_id)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* m = FromMethodId(method_id);
-  return MethodHelper(m).GetName();
+  return m->GetName();
 }
 
 std::string Dbg::GetFieldName(JDWP::FieldId field_id)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtField* f = FromFieldId(field_id);
-  return FieldHelper(f).GetName();
+  return FromFieldId(field_id)->GetName();
 }
 
 /*
@@ -1402,7 +1458,7 @@
  */
 static uint16_t MangleSlot(uint16_t slot, mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item == nullptr) {
     // We should not get here for a method without code (native, proxy or abstract). Log it and
     // return the slot as is since all registers are arguments.
@@ -1424,7 +1480,7 @@
  */
 static uint16_t DemangleSlot(uint16_t slot, mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item == nullptr) {
     // We should not get here for a method without code (native, proxy or abstract). Log it and
     // return the slot as is since all registers are arguments.
@@ -1454,10 +1510,9 @@
 
   for (size_t i = 0; i < instance_field_count + static_field_count; ++i) {
     mirror::ArtField* f = (i < instance_field_count) ? c->GetInstanceField(i) : c->GetStaticField(i - instance_field_count);
-    FieldHelper fh(f);
     expandBufAddFieldId(pReply, ToFieldId(f));
-    expandBufAddUtf8String(pReply, fh.GetName());
-    expandBufAddUtf8String(pReply, fh.GetTypeDescriptor());
+    expandBufAddUtf8String(pReply, f->GetName());
+    expandBufAddUtf8String(pReply, f->GetTypeDescriptor());
     if (with_generic) {
       static const char genericSignature[1] = "";
       expandBufAddUtf8String(pReply, genericSignature);
@@ -1482,10 +1537,9 @@
 
   for (size_t i = 0; i < direct_method_count + virtual_method_count; ++i) {
     mirror::ArtMethod* m = (i < direct_method_count) ? c->GetDirectMethod(i) : c->GetVirtualMethod(i - direct_method_count);
-    MethodHelper mh(m);
     expandBufAddMethodId(pReply, ToMethodId(m));
-    expandBufAddUtf8String(pReply, mh.GetName());
-    expandBufAddUtf8String(pReply, mh.GetSignature().ToString());
+    expandBufAddUtf8String(pReply, m->GetName());
+    expandBufAddUtf8String(pReply, m->GetSignature().ToString());
     if (with_generic) {
       static const char genericSignature[1] = "";
       expandBufAddUtf8String(pReply, genericSignature);
@@ -1527,8 +1581,7 @@
     }
   };
   mirror::ArtMethod* m = FromMethodId(method_id);
-  MethodHelper mh(m);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
   uint64_t start, end;
   if (code_item == nullptr) {
     DCHECK(m->IsNative() || m->IsProxyMethod());
@@ -1552,25 +1605,30 @@
   context.pReply = pReply;
 
   if (code_item != nullptr) {
-    mh.GetDexFile().DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                    DebugCallbackContext::Callback, NULL, &context);
+    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
+                                     DebugCallbackContext::Callback, NULL, &context);
   }
 
   JDWP::Set4BE(expandBufGetBuffer(pReply) + numLinesOffset, context.numItems);
 }
 
-void Dbg::OutputVariableTable(JDWP::RefTypeId, JDWP::MethodId method_id, bool with_generic, JDWP::ExpandBuf* pReply) {
+void Dbg::OutputVariableTable(JDWP::RefTypeId, JDWP::MethodId method_id, bool with_generic,
+                              JDWP::ExpandBuf* pReply) {
   struct DebugCallbackContext {
     mirror::ArtMethod* method;
     JDWP::ExpandBuf* pReply;
     size_t variable_count;
     bool with_generic;
 
-    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress, const char* name, const char* descriptor, const char* signature)
+    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress,
+                         const char* name, const char* descriptor, const char* signature)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
 
-      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d", pContext->variable_count, startAddress, endAddress - startAddress, name, descriptor, signature, slot, MangleSlot(slot, pContext->method));
+      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d",
+                                 pContext->variable_count, startAddress, endAddress - startAddress,
+                                 name, descriptor, signature, slot,
+                                 MangleSlot(slot, pContext->method));
 
       slot = MangleSlot(slot, pContext->method);
 
@@ -1587,11 +1645,10 @@
     }
   };
   mirror::ArtMethod* m = FromMethodId(method_id);
-  MethodHelper mh(m);
 
   // arg_count considers doubles and longs to take 2 units.
   // variable_count considers everything to take 1 unit.
-  std::string shorty(mh.GetShorty());
+  std::string shorty(m->GetShorty());
   expandBufAdd4BE(pReply, mirror::ArtMethod::NumArgRegisters(shorty));
 
   // We don't know the total number of variables yet, so leave a blank and update it later.
@@ -1604,10 +1661,11 @@
   context.variable_count = 0;
   context.with_generic = with_generic;
 
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item != nullptr) {
-    mh.GetDexFile().DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(), NULL,
-                                    DebugCallbackContext::Callback, &context);
+    m->GetDexFile()->DecodeDebugInfo(
+        code_item, m->IsStatic(), m->GetDexMethodIndex(), NULL, DebugCallbackContext::Callback,
+        &context);
   }
 
   JDWP::Set4BE(expandBufGetBuffer(pReply) + variable_count_offset, context.variable_count);
@@ -1616,14 +1674,14 @@
 void Dbg::OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
                                   JDWP::ExpandBuf* pReply) {
   mirror::ArtMethod* m = FromMethodId(method_id);
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(MethodHelper(m).GetShorty());
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(m->GetShorty());
   OutputJValue(tag, return_value, pReply);
 }
 
 void Dbg::OutputFieldValue(JDWP::FieldId field_id, const JValue* field_value,
                            JDWP::ExpandBuf* pReply) {
   mirror::ArtField* f = FromFieldId(field_id);
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
   OutputJValue(tag, field_value, pReply);
 }
 
@@ -1634,8 +1692,7 @@
   if (m == NULL) {
     return JDWP::ERR_INVALID_METHODID;
   }
-  MethodHelper mh(m);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
   size_t byte_count = code_item->insns_size_in_code_units_ * 2;
   const uint8_t* begin = reinterpret_cast<const uint8_t*>(code_item->insns_);
   const uint8_t* end = begin + byte_count;
@@ -1646,11 +1703,11 @@
 }
 
 JDWP::JdwpTag Dbg::GetFieldBasicTag(JDWP::FieldId field_id) {
-  return BasicTagFromDescriptor(FieldHelper(FromFieldId(field_id)).GetTypeDescriptor());
+  return BasicTagFromDescriptor(FromFieldId(field_id)->GetTypeDescriptor());
 }
 
 JDWP::JdwpTag Dbg::GetStaticFieldBasicTag(JDWP::FieldId field_id) {
-  return BasicTagFromDescriptor(FieldHelper(FromFieldId(field_id)).GetTypeDescriptor());
+  return BasicTagFromDescriptor(FromFieldId(field_id)->GetTypeDescriptor());
 }
 
 static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId ref_type_id, JDWP::ObjectId object_id,
@@ -1694,7 +1751,7 @@
     o = f->GetDeclaringClass();
   }
 
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
   JValue field_value;
   if (tag == JDWP::JT_VOID) {
     LOG(FATAL) << "Unknown tag: " << tag;
@@ -1743,7 +1800,7 @@
     o = f->GetDeclaringClass();
   }
 
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
 
   if (IsPrimitiveTag(tag)) {
     if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
@@ -1761,7 +1818,14 @@
       return JDWP::ERR_INVALID_OBJECT;
     }
     if (v != NULL) {
-      mirror::Class* field_type = FieldHelper(f).GetType();
+      mirror::Class* field_type;
+      {
+        StackHandleScope<3> hs(Thread::Current());
+        HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v));
+        HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+        HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
+        field_type = FieldHelper(h_f).GetType();
+      }
       if (!field_type->IsAssignableFrom(v->GetClass())) {
         return JDWP::ERR_INVALID_OBJECT;
       }
@@ -1838,9 +1902,12 @@
   }
   const char* old_cause = soa.Self()->StartAssertNoThreadSuspension("Debugger: GetThreadGroup");
   // Okay, so it's an object, but is it actually a thread?
-  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
-  Thread* thread;
-  JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
+  JDWP::JdwpError error;
+  {
+    MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
+    Thread* thread;
+    error = DecodeThread(soa, thread_id, thread);
+  }
   if (error == JDWP::ERR_THREAD_NOT_ALIVE) {
     // Zombie threads are in the null group.
     expandBufAddObjectId(pReply, JDWP::ObjectId(0));
@@ -1849,9 +1916,9 @@
     mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_Thread);
     CHECK(c != nullptr);
     mirror::ArtField* f = c->FindInstanceField("group", "Ljava/lang/ThreadGroup;");
-    CHECK(f != NULL);
+    CHECK(f != nullptr);
     mirror::Object* group = f->GetObject(thread_object);
-    CHECK(group != NULL);
+    CHECK(group != nullptr);
     JDWP::ObjectId thread_group_id = gRegistry->Add(group);
     expandBufAddObjectId(pReply, thread_group_id);
   }
@@ -2870,18 +2937,18 @@
 
 static bool IsMethodPossiblyInlined(Thread* self, mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  MethodHelper mh(m);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item == nullptr) {
     // TODO We should not be asked to watch location in a native or abstract method so the code item
     // should never be null. We could just check we never encounter this case.
     return false;
   }
   StackHandleScope<2> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
-  verifier::MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader,
-                                    &mh.GetClassDef(), code_item, m->GetDexMethodIndex(), m,
+  mirror::Class* declaring_class = m->GetDeclaringClass();
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
+  verifier::MethodVerifier verifier(dex_cache->GetDexFile(), &dex_cache, &class_loader,
+                                    &m->GetClassDef(), code_item, m->GetDexMethodIndex(), m,
                                     m->GetAccessFlags(), false, true, false);
   // Note: we don't need to verify the method.
   return InlineMethodAnalyser::AnalyseMethodCode(&verifier, nullptr);
@@ -3151,11 +3218,10 @@
   single_step_control->dex_pcs.clear();
   mirror::ArtMethod* m = single_step_control->method;
   if (!m->IsNative()) {
-    MethodHelper mh(m);
-    const DexFile::CodeItem* const code_item = mh.GetCodeItem();
+    const DexFile::CodeItem* const code_item = m->GetCodeItem();
     DebugCallbackContext context(single_step_control, line_number, code_item);
-    mh.GetDexFile().DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                    DebugCallbackContext::Callback, NULL, &context);
+    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
+                                     DebugCallbackContext::Callback, NULL, &context);
   }
 
   //
@@ -3303,32 +3369,41 @@
     }
 
     // Check the argument list matches the method.
-    MethodHelper mh(m);
-    if (mh.GetShortyLength() - 1 != arg_count) {
+    uint32_t shorty_len = 0;
+    const char* shorty = m->GetShorty(&shorty_len);
+    if (shorty_len - 1 != arg_count) {
       return JDWP::ERR_ILLEGAL_ARGUMENT;
     }
-    const char* shorty = mh.GetShorty();
-    const DexFile::TypeList* types = mh.GetParameterTypeList();
-    for (size_t i = 0; i < arg_count; ++i) {
-      if (shorty[i + 1] != JdwpTagToShortyChar(arg_types[i])) {
-        return JDWP::ERR_ILLEGAL_ARGUMENT;
-      }
 
-      if (shorty[i + 1] == 'L') {
-        // Did we really get an argument of an appropriate reference type?
-        mirror::Class* parameter_type = mh.GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
-        mirror::Object* argument = gRegistry->Get<mirror::Object*>(arg_values[i]);
-        if (argument == ObjectRegistry::kInvalidObject) {
-          return JDWP::ERR_INVALID_OBJECT;
-        }
-        if (argument != NULL && !argument->InstanceOf(parameter_type)) {
+    {
+      StackHandleScope<3> hs(soa.Self());
+      MethodHelper mh(hs.NewHandle(m));
+      HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&receiver));
+      HandleWrapper<mirror::Class> h_klass(hs.NewHandleWrapper(&c));
+      const DexFile::TypeList* types = m->GetParameterTypeList();
+      for (size_t i = 0; i < arg_count; ++i) {
+        if (shorty[i + 1] != JdwpTagToShortyChar(arg_types[i])) {
           return JDWP::ERR_ILLEGAL_ARGUMENT;
         }
 
-        // Turn the on-the-wire ObjectId into a jobject.
-        jvalue& v = reinterpret_cast<jvalue&>(arg_values[i]);
-        v.l = gRegistry->GetJObject(arg_values[i]);
+        if (shorty[i + 1] == 'L') {
+          // Did we really get an argument of an appropriate reference type?
+          mirror::Class* parameter_type = mh.GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
+          mirror::Object* argument = gRegistry->Get<mirror::Object*>(arg_values[i]);
+          if (argument == ObjectRegistry::kInvalidObject) {
+            return JDWP::ERR_INVALID_OBJECT;
+          }
+          if (argument != NULL && !argument->InstanceOf(parameter_type)) {
+            return JDWP::ERR_ILLEGAL_ARGUMENT;
+          }
+
+          // Turn the on-the-wire ObjectId into a jobject.
+          jvalue& v = reinterpret_cast<jvalue&>(arg_values[i]);
+          v.l = gRegistry->GetJObject(arg_values[i]);
+        }
       }
+      // Update in case it moved.
+      m = mh.GetMethod();
     }
 
     req->receiver = receiver;
@@ -3415,6 +3490,7 @@
   auto old_throw_method = hs.NewHandle<mirror::ArtMethod>(nullptr);
   auto old_exception = hs.NewHandle<mirror::Throwable>(nullptr);
   uint32_t old_throw_dex_pc;
+  bool old_exception_report_flag;
   {
     ThrowLocation old_throw_location;
     mirror::Throwable* old_exception_obj = soa.Self()->GetException(&old_throw_location);
@@ -3422,6 +3498,7 @@
     old_throw_method.Assign(old_throw_location.GetMethod());
     old_exception.Assign(old_exception_obj);
     old_throw_dex_pc = old_throw_location.GetDexPc();
+    old_exception_report_flag = soa.Self()->IsExceptionReportedToInstrumentation();
     soa.Self()->ClearException();
   }
 
@@ -3447,7 +3524,7 @@
   mirror::Throwable* exception = soa.Self()->GetException(NULL);
   soa.Self()->ClearException();
   pReq->exception = gRegistry->Add(exception);
-  pReq->result_tag = BasicTagFromDescriptor(MethodHelper(m.Get()).GetShorty());
+  pReq->result_tag = BasicTagFromDescriptor(m.Get()->GetShorty());
   if (pReq->exception != 0) {
     VLOG(jdwp) << "  JDWP invocation returning with exception=" << exception
         << " " << exception->Dump();
@@ -3476,6 +3553,7 @@
     ThrowLocation gc_safe_throw_location(old_throw_this_object.Get(), old_throw_method.Get(),
                                          old_throw_dex_pc);
     soa.Self()->SetException(gc_safe_throw_location, old_exception.Get());
+    soa.Self()->SetExceptionReportedToInstrumentation(old_exception_report_flag);
   }
 }
 
@@ -4128,8 +4206,8 @@
     }
     mirror::ArtMethod* m = GetMethod();
     if (!m->IsRuntimeMethod()) {
-      record->stack[depth].method = m;
-      record->stack[depth].dex_pc = GetDexPc();
+      record->StackElement(depth)->SetMethod(m);
+      record->StackElement(depth)->SetDexPc(GetDexPc());
       ++depth;
     }
     return true;
@@ -4138,8 +4216,8 @@
   ~AllocRecordStackVisitor() {
     // Clear out any unused stack trace elements.
     for (; depth < kMaxAllocRecordStackDepth; ++depth) {
-      record->stack[depth].method = NULL;
-      record->stack[depth].dex_pc = 0;
+      record->StackElement(depth)->SetMethod(nullptr);
+      record->StackElement(depth)->SetDexPc(0);
     }
   }
 
@@ -4163,9 +4241,9 @@
 
   // Fill in the basics.
   AllocRecord* record = &recent_allocation_records_[alloc_record_head_];
-  record->type = type;
-  record->byte_count = byte_count;
-  record->thin_lock_id = self->GetThreadId();
+  record->SetType(type);
+  record->SetByteCount(byte_count);
+  record->SetThinLockId(self->GetThreadId());
 
   // Fill in the stack trace.
   AllocRecordStackVisitor visitor(self, record);
@@ -4206,15 +4284,16 @@
   while (count--) {
     AllocRecord* record = &recent_allocation_records_[i];
 
-    LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->thin_lock_id, record->byte_count)
-              << PrettyClass(record->type);
+    LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->ThinLockId(), record->ByteCount())
+              << PrettyClass(record->Type());
 
     for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
-      mirror::ArtMethod* m = record->stack[stack_frame].method;
+      AllocRecordStackTraceElement* stack_element = record->StackElement(stack_frame);
+      mirror::ArtMethod* m = stack_element->Method();
       if (m == NULL) {
         break;
       }
-      LOG(INFO) << "    " << PrettyMethod(m) << " line " << record->stack[stack_frame].LineNumber();
+      LOG(INFO) << "    " << PrettyMethod(m) << " line " << stack_element->LineNumber();
     }
 
     // pause periodically to help logcat catch up
@@ -4226,35 +4305,6 @@
   }
 }
 
-void Dbg::UpdateObjectPointers(IsMarkedCallback* callback, void* arg) {
-  if (recent_allocation_records_ != nullptr) {
-    MutexLock mu(Thread::Current(), *alloc_tracker_lock_);
-    size_t i = HeadIndex();
-    size_t count = alloc_record_count_;
-    while (count--) {
-      AllocRecord* record = &recent_allocation_records_[i];
-      DCHECK(record != nullptr);
-      record->UpdateObjectPointers(callback, arg);
-      i = (i + 1) & (alloc_record_max_ - 1);
-    }
-  }
-  if (gRegistry != nullptr) {
-    gRegistry->UpdateObjectPointers(callback, arg);
-  }
-}
-
-void Dbg::AllowNewObjectRegistryObjects() {
-  if (gRegistry != nullptr) {
-    gRegistry->AllowNewObjects();
-  }
-}
-
-void Dbg::DisallowNewObjectRegistryObjects() {
-  if (gRegistry != nullptr) {
-    gRegistry->DisallowNewObjects();
-  }
-}
-
 class StringTable {
  public:
   StringTable() {
@@ -4291,10 +4341,10 @@
   DISALLOW_COPY_AND_ASSIGN(StringTable);
 };
 
-static const char* GetMethodSourceFile(MethodHelper* mh)
+static const char* GetMethodSourceFile(mirror::ArtMethod* method)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(mh != nullptr);
-  const char* source_file = mh->GetDeclaringClassSourceFile();
+  DCHECK(method != nullptr);
+  const char* source_file = method->GetDeclaringClassSourceFile();
   return (source_file != nullptr) ? source_file : "";
 }
 
@@ -4361,16 +4411,14 @@
     while (count--) {
       AllocRecord* record = &recent_allocation_records_[idx];
 
-      class_names.Add(record->type->GetDescriptor().c_str());
+      class_names.Add(record->Type()->GetDescriptor().c_str());
 
-      MethodHelper mh;
       for (size_t i = 0; i < kMaxAllocRecordStackDepth; i++) {
-        mirror::ArtMethod* m = record->stack[i].method;
+        mirror::ArtMethod* m = record->StackElement(i)->Method();
         if (m != NULL) {
-          mh.ChangeMethod(m);
-          class_names.Add(mh.GetDeclaringClassDescriptor());
-          method_names.Add(mh.GetName());
-          filenames.Add(GetMethodSourceFile(&mh));
+          class_names.Add(m->GetDeclaringClassDescriptor());
+          method_names.Add(m->GetName());
+          filenames.Add(GetMethodSourceFile(m));
         }
       }
 
@@ -4416,27 +4464,26 @@
       AllocRecord* record = &recent_allocation_records_[idx];
       size_t stack_depth = record->GetDepth();
       size_t allocated_object_class_name_index =
-          class_names.IndexOf(record->type->GetDescriptor().c_str());
-      JDWP::Append4BE(bytes, record->byte_count);
-      JDWP::Append2BE(bytes, record->thin_lock_id);
+          class_names.IndexOf(record->Type()->GetDescriptor().c_str());
+      JDWP::Append4BE(bytes, record->ByteCount());
+      JDWP::Append2BE(bytes, record->ThinLockId());
       JDWP::Append2BE(bytes, allocated_object_class_name_index);
       JDWP::Append1BE(bytes, stack_depth);
 
-      MethodHelper mh;
       for (size_t stack_frame = 0; stack_frame < stack_depth; ++stack_frame) {
         // For each stack frame:
         // (2b) method's class name
         // (2b) method name
         // (2b) method source file
         // (2b) line number, clipped to 32767; -2 if native; -1 if no source
-        mh.ChangeMethod(record->stack[stack_frame].method);
-        size_t class_name_index = class_names.IndexOf(mh.GetDeclaringClassDescriptor());
-        size_t method_name_index = method_names.IndexOf(mh.GetName());
-        size_t file_name_index = filenames.IndexOf(GetMethodSourceFile(&mh));
+        mirror::ArtMethod* m = record->StackElement(stack_frame)->Method();
+        size_t class_name_index = class_names.IndexOf(m->GetDeclaringClassDescriptor());
+        size_t method_name_index = method_names.IndexOf(m->GetName());
+        size_t file_name_index = filenames.IndexOf(GetMethodSourceFile(m));
         JDWP::Append2BE(bytes, class_name_index);
         JDWP::Append2BE(bytes, method_name_index);
         JDWP::Append2BE(bytes, file_name_index);
-        JDWP::Append2BE(bytes, record->stack[stack_frame].LineNumber());
+        JDWP::Append2BE(bytes, record->StackElement(stack_frame)->LineNumber());
       }
 
       idx = (idx + 1) & (alloc_record_max_ - 1);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 31ffd6e..1cf0b0c 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -41,7 +41,7 @@
 class Object;
 class Throwable;
 }  // namespace mirror
-struct AllocRecord;
+class AllocRecord;
 class Thread;
 class ThrowLocation;
 
@@ -531,11 +531,6 @@
   static size_t HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(alloc_tracker_lock_);
   static void DumpRecentAllocations() LOCKS_EXCLUDED(alloc_tracker_lock_);
 
-  // Updates the stored direct object pointers (called from SweepSystemWeaks).
-  static void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
-      LOCKS_EXCLUDED(alloc_tracker_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   enum HpifWhen {
     HPIF_WHEN_NEVER = 0,
     HPIF_WHEN_NOW = 1,
@@ -560,9 +555,6 @@
   static void DdmSendHeapSegments(bool native)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void AllowNewObjectRegistryObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void DisallowNewObjectRegistryObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
  private:
   static void DdmBroadcast(bool connect) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostThreadStartOrStop(Thread*, uint32_t)
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 1d5032d..8270a2b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -26,7 +26,6 @@
 #include "globals.h"
 #include "invoke_type.h"
 #include "jni.h"
-#include "mem_map.h"
 #include "modifiers.h"
 #include "safe_map.h"
 
@@ -41,6 +40,7 @@
   class DexCache;
 }  // namespace mirror
 class ClassLinker;
+class MemMap;
 class Signature;
 template<class T> class Handle;
 class StringPiece;
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 17d1ffc..291e2d0 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -66,6 +66,64 @@
   return true;
 }
 
+const char* DexFileVerifier::CheckLoadStringByIdx(uint32_t idx, const char* error_string) {
+  if (UNLIKELY(!CheckIndex(idx, dex_file_->NumStringIds(), error_string))) {
+    return nullptr;
+  }
+  return dex_file_->StringDataByIdx(idx);
+}
+
+const char* DexFileVerifier::CheckLoadStringByTypeIdx(uint32_t type_idx, const char* error_string) {
+  if (UNLIKELY(!CheckIndex(type_idx, dex_file_->NumTypeIds(), error_string))) {
+    return nullptr;
+  }
+  const DexFile::TypeId& type_id = dex_file_->GetTypeId(type_idx);
+  uint32_t idx = type_id.descriptor_idx_;
+  return CheckLoadStringByIdx(idx, error_string);
+}
+
+const DexFile::FieldId* DexFileVerifier::CheckLoadFieldId(uint32_t idx, const char* error_string) {
+  if (UNLIKELY(!CheckIndex(idx, dex_file_->NumFieldIds(), error_string))) {
+    return nullptr;
+  }
+  return &dex_file_->GetFieldId(idx);
+}
+
+const DexFile::MethodId* DexFileVerifier::CheckLoadMethodId(uint32_t idx, const char* err_string) {
+  if (UNLIKELY(!CheckIndex(idx, dex_file_->NumMethodIds(), err_string))) {
+    return nullptr;
+  }
+  return &dex_file_->GetMethodId(idx);
+}
+
+// Helper macro to load string and return false on error.
+#define LOAD_STRING(var, idx, error)                  \
+  const char* var = CheckLoadStringByIdx(idx, error); \
+  if (UNLIKELY(var == nullptr)) {                     \
+    return false;                                     \
+  }
+
+// Helper macro to load string by type idx and return false on error.
+#define LOAD_STRING_BY_TYPE(var, type_idx, error)              \
+  const char* var = CheckLoadStringByTypeIdx(type_idx, error); \
+  if (UNLIKELY(var == nullptr)) {                              \
+    return false;                                              \
+  }
+
+// Helper macro to load method id. Return last parameter on error.
+#define LOAD_METHOD(var, idx, error_string, error_stmt)                 \
+  const DexFile::MethodId* var  = CheckLoadMethodId(idx, error_string); \
+  if (UNLIKELY(var == nullptr)) {                                       \
+    error_stmt;                                                         \
+  }
+
+// Helper macro to load method id. Return last parameter on error.
+#define LOAD_FIELD(var, idx, fmt, error_stmt)               \
+  const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \
+  if (UNLIKELY(var == nullptr)) {                           \
+    error_stmt;                                             \
+  }
+
 bool DexFileVerifier::Verify(const DexFile* dex_file, const byte* begin, size_t size,
                              const char* location, std::string* error_msg) {
   std::unique_ptr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
@@ -120,7 +178,8 @@
   if (UNLIKELY((range_start < file_start) || (range_start > file_end) ||
                (range_end < file_start) || (range_end > file_end))) {
     ErrorStringPrintf("Bad range for %s: %zx to %zx", label,
-                      range_start - file_start, range_end - file_start);
+                      static_cast<size_t>(range_start - file_start),
+                      static_cast<size_t>(range_end - file_start));
     return false;
   }
   return true;
@@ -1319,41 +1378,49 @@
   return true;
 }
 
-uint16_t DexFileVerifier::FindFirstClassDataDefiner(const byte* ptr) const {
+uint16_t DexFileVerifier::FindFirstClassDataDefiner(const byte* ptr, bool* success) {
   ClassDataItemIterator it(*dex_file_, ptr);
+  *success = true;
 
   if (it.HasNextStaticField() || it.HasNextInstanceField()) {
-    const DexFile::FieldId& field = dex_file_->GetFieldId(it.GetMemberIndex());
-    return field.class_idx_;
+    LOAD_FIELD(field, it.GetMemberIndex(), "first_class_data_definer field_id",
+               *success = false; return DexFile::kDexNoIndex16)
+    return field->class_idx_;
   }
 
   if (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
-    const DexFile::MethodId& method = dex_file_->GetMethodId(it.GetMemberIndex());
-    return method.class_idx_;
+    LOAD_METHOD(method, it.GetMemberIndex(), "first_class_data_definer method_id",
+                *success = false; return DexFile::kDexNoIndex16)
+    return method->class_idx_;
   }
 
   return DexFile::kDexNoIndex16;
 }
 
-uint16_t DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const byte* ptr) const {
+uint16_t DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const byte* ptr, bool* success) {
   const DexFile::AnnotationsDirectoryItem* item =
       reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr);
+  *success = true;
+
   if (item->fields_size_ != 0) {
     DexFile::FieldAnnotationsItem* field_items = (DexFile::FieldAnnotationsItem*) (item + 1);
-    const DexFile::FieldId& field = dex_file_->GetFieldId(field_items[0].field_idx_);
-    return field.class_idx_;
+    LOAD_FIELD(field, field_items[0].field_idx_, "first_annotations_dir_definer field_id",
+               *success = false; return DexFile::kDexNoIndex16)
+    return field->class_idx_;
   }
 
   if (item->methods_size_ != 0) {
     DexFile::MethodAnnotationsItem* method_items = (DexFile::MethodAnnotationsItem*) (item + 1);
-    const DexFile::MethodId& method = dex_file_->GetMethodId(method_items[0].method_idx_);
-    return method.class_idx_;
+    LOAD_METHOD(method, method_items[0].method_idx_, "first_annotations_dir_definer method id",
+                *success = false; return DexFile::kDexNoIndex16)
+    return method->class_idx_;
   }
 
   if (item->parameters_size_ != 0) {
     DexFile::ParameterAnnotationsItem* parameter_items = (DexFile::ParameterAnnotationsItem*) (item + 1);
-    const DexFile::MethodId& method = dex_file_->GetMethodId(parameter_items[0].method_idx_);
-    return method.class_idx_;
+    LOAD_METHOD(method, parameter_items[0].method_idx_, "first_annotations_dir_definer method id",
+                *success = false; return DexFile::kDexNoIndex16)
+    return method->class_idx_;
   }
 
   return DexFile::kDexNoIndex16;
@@ -1384,7 +1451,8 @@
 
 bool DexFileVerifier::CheckInterTypeIdItem() {
   const DexFile::TypeId* item = reinterpret_cast<const DexFile::TypeId*>(ptr_);
-  const char* descriptor = dex_file_->StringDataByIdx(item->descriptor_idx_);
+
+  LOAD_STRING(descriptor, item->descriptor_idx_, "inter_type_id_item descriptor_idx")
 
   // Check that the descriptor is a valid type.
   if (UNLIKELY(!IsValidDescriptor(descriptor))) {
@@ -1408,20 +1476,27 @@
 
 bool DexFileVerifier::CheckInterProtoIdItem() {
   const DexFile::ProtoId* item = reinterpret_cast<const DexFile::ProtoId*>(ptr_);
-  const char* shorty = dex_file_->StringDataByIdx(item->shorty_idx_);
+
+  LOAD_STRING(shorty, item->shorty_idx_, "inter_proto_id_item shorty_idx")
+
   if (item->parameters_off_ != 0 &&
       !CheckOffsetToTypeMap(item->parameters_off_, DexFile::kDexTypeTypeList)) {
     return false;
   }
 
   // Check the return type and advance the shorty.
-  if (!CheckShortyDescriptorMatch(*shorty, dex_file_->StringByTypeIdx(item->return_type_idx_), true)) {
+  LOAD_STRING_BY_TYPE(return_type, item->return_type_idx_, "inter_proto_id_item return_type_idx")
+  if (!CheckShortyDescriptorMatch(*shorty, return_type, true)) {
     return false;
   }
   shorty++;
 
   DexFileParameterIterator it(*dex_file_, *item);
   while (it.HasNext() && *shorty != '\0') {
+    if (!CheckIndex(it.GetTypeIdx(), dex_file_->NumTypeIds(),
+                    "inter_proto_id_item shorty type_idx")) {
+      return false;
+    }
     const char* descriptor = it.GetDescriptor();
     if (!CheckShortyDescriptorMatch(*shorty, descriptor, false)) {
       return false;
@@ -1476,21 +1551,21 @@
   const DexFile::FieldId* item = reinterpret_cast<const DexFile::FieldId*>(ptr_);
 
   // Check that the class descriptor is valid.
-  const char* descriptor = dex_file_->StringByTypeIdx(item->class_idx_);
-  if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) {
-    ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", descriptor);
+  LOAD_STRING_BY_TYPE(class_descriptor, item->class_idx_, "inter_field_id_item class_idx")
+  if (UNLIKELY(!IsValidDescriptor(class_descriptor) || class_descriptor[0] != 'L')) {
+    ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", class_descriptor);
     return false;
   }
 
   // Check that the type descriptor is a valid field name.
-  descriptor = dex_file_->StringByTypeIdx(item->type_idx_);
-  if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] == 'V')) {
-    ErrorStringPrintf("Invalid descriptor for type_idx: '%s'", descriptor);
+  LOAD_STRING_BY_TYPE(type_descriptor, item->type_idx_, "inter_field_id_item type_idx")
+  if (UNLIKELY(!IsValidDescriptor(type_descriptor) || type_descriptor[0] == 'V')) {
+    ErrorStringPrintf("Invalid descriptor for type_idx: '%s'", type_descriptor);
     return false;
   }
 
   // Check that the name is valid.
-  descriptor = dex_file_->StringDataByIdx(item->name_idx_);
+  LOAD_STRING(descriptor, item->name_idx_, "inter_field_id_item name_idx")
   if (UNLIKELY(!IsValidMemberName(descriptor))) {
     ErrorStringPrintf("Invalid field name: '%s'", descriptor);
     return false;
@@ -1523,19 +1598,26 @@
   const DexFile::MethodId* item = reinterpret_cast<const DexFile::MethodId*>(ptr_);
 
   // Check that the class descriptor is a valid reference name.
-  const char* descriptor = dex_file_->StringByTypeIdx(item->class_idx_);
-  if (UNLIKELY(!IsValidDescriptor(descriptor) || (descriptor[0] != 'L' && descriptor[0] != '['))) {
-    ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", descriptor);
+  LOAD_STRING_BY_TYPE(class_descriptor, item->class_idx_, "inter_method_id_item class_idx")
+  if (UNLIKELY(!IsValidDescriptor(class_descriptor) || (class_descriptor[0] != 'L' &&
+                                                        class_descriptor[0] != '['))) {
+    ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", class_descriptor);
     return false;
   }
 
   // Check that the name is valid.
-  descriptor = dex_file_->StringDataByIdx(item->name_idx_);
+  LOAD_STRING(descriptor, item->name_idx_, "inter_method_id_item name_idx")
   if (UNLIKELY(!IsValidMemberName(descriptor))) {
     ErrorStringPrintf("Invalid method name: '%s'", descriptor);
     return false;
   }
 
+  // Check that the proto id is valid.
+  if (UNLIKELY(!CheckIndex(item->proto_idx_, dex_file_->NumProtoIds(),
+                           "inter_method_id_item proto_idx"))) {
+    return false;
+  }
+
   // Check ordering between items. This relies on the other sections being in order.
   if (previous_item_ != NULL) {
     const DexFile::MethodId* prev_item = reinterpret_cast<const DexFile::MethodId*>(previous_item_);
@@ -1561,11 +1643,10 @@
 
 bool DexFileVerifier::CheckInterClassDefItem() {
   const DexFile::ClassDef* item = reinterpret_cast<const DexFile::ClassDef*>(ptr_);
-  uint32_t class_idx = item->class_idx_;
-  const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
 
-  if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) {
-    ErrorStringPrintf("Invalid class descriptor: '%s'", descriptor);
+  LOAD_STRING_BY_TYPE(class_descriptor, item->class_idx_, "inter_class_def_item class_idx")
+  if (UNLIKELY(!IsValidDescriptor(class_descriptor) || class_descriptor[0] != 'L')) {
+    ErrorStringPrintf("Invalid class descriptor: '%s'", class_descriptor);
     return false;
   }
 
@@ -1587,9 +1668,10 @@
   }
 
   if (item->superclass_idx_ != DexFile::kDexNoIndex16) {
-    descriptor = dex_file_->StringByTypeIdx(item->superclass_idx_);
-    if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) {
-      ErrorStringPrintf("Invalid superclass: '%s'", descriptor);
+    LOAD_STRING_BY_TYPE(superclass_descriptor, item->superclass_idx_,
+                        "inter_class_def_item superclass_idx")
+    if (UNLIKELY(!IsValidDescriptor(superclass_descriptor) || superclass_descriptor[0] != 'L')) {
+      ErrorStringPrintf("Invalid superclass: '%s'", superclass_descriptor);
       return false;
     }
   }
@@ -1600,9 +1682,10 @@
 
     // Ensure that all interfaces refer to classes (not arrays or primitives).
     for (uint32_t i = 0; i < size; i++) {
-      descriptor = dex_file_->StringByTypeIdx(interfaces->GetTypeItem(i).type_idx_);
-      if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) {
-        ErrorStringPrintf("Invalid interface: '%s'", descriptor);
+      LOAD_STRING_BY_TYPE(inf_descriptor, interfaces->GetTypeItem(i).type_idx_,
+                          "inter_class_def_item interface type_idx")
+      if (UNLIKELY(!IsValidDescriptor(inf_descriptor) || inf_descriptor[0] != 'L')) {
+        ErrorStringPrintf("Invalid interface: '%s'", inf_descriptor);
         return false;
       }
     }
@@ -1626,7 +1709,11 @@
   // Check that references in class_data_item are to the right class.
   if (item->class_data_off_ != 0) {
     const byte* data = begin_ + item->class_data_off_;
-    uint16_t data_definer = FindFirstClassDataDefiner(data);
+    bool success;
+    uint16_t data_definer = FindFirstClassDataDefiner(data, &success);
+    if (!success) {
+      return false;
+    }
     if (UNLIKELY((data_definer != item->class_idx_) && (data_definer != DexFile::kDexNoIndex16))) {
       ErrorStringPrintf("Invalid class_data_item");
       return false;
@@ -1636,7 +1723,11 @@
   // Check that references in annotations_directory_item are to right class.
   if (item->annotations_off_ != 0) {
     const byte* data = begin_ + item->annotations_off_;
-    uint16_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data);
+    bool success;
+    uint16_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data, &success);
+    if (!success) {
+      return false;
+    }
     if (UNLIKELY((annotations_definer != item->class_idx_) &&
                  (annotations_definer != DexFile::kDexNoIndex16))) {
       ErrorStringPrintf("Invalid annotations_directory_item");
@@ -1698,11 +1789,15 @@
 
 bool DexFileVerifier::CheckInterClassDataItem() {
   ClassDataItemIterator it(*dex_file_, ptr_);
-  uint16_t defining_class = FindFirstClassDataDefiner(ptr_);
+  bool success;
+  uint16_t defining_class = FindFirstClassDataDefiner(ptr_, &success);
+  if (!success) {
+    return false;
+  }
 
   for (; it.HasNextStaticField() || it.HasNextInstanceField(); it.Next()) {
-    const DexFile::FieldId& field = dex_file_->GetFieldId(it.GetMemberIndex());
-    if (UNLIKELY(field.class_idx_ != defining_class)) {
+    LOAD_FIELD(field, it.GetMemberIndex(), "inter_class_data_item field_id", return false)
+    if (UNLIKELY(field->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for class_data_item field");
       return false;
     }
@@ -1712,8 +1807,8 @@
     if (code_off != 0 && !CheckOffsetToTypeMap(code_off, DexFile::kDexTypeCodeItem)) {
       return false;
     }
-    const DexFile::MethodId& method = dex_file_->GetMethodId(it.GetMemberIndex());
-    if (UNLIKELY(method.class_idx_ != defining_class)) {
+    LOAD_METHOD(method, it.GetMemberIndex(), "inter_class_data_item method_id", return false)
+    if (UNLIKELY(method->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for class_data_item method");
       return false;
     }
@@ -1726,7 +1821,11 @@
 bool DexFileVerifier::CheckInterAnnotationsDirectoryItem() {
   const DexFile::AnnotationsDirectoryItem* item =
       reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr_);
-  uint16_t defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_);
+  bool success;
+  uint16_t defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_, &success);
+  if (!success) {
+    return false;
+  }
 
   if (item->class_annotations_off_ != 0 &&
       !CheckOffsetToTypeMap(item->class_annotations_off_, DexFile::kDexTypeAnnotationSetItem)) {
@@ -1738,8 +1837,9 @@
       reinterpret_cast<const DexFile::FieldAnnotationsItem*>(item + 1);
   uint32_t field_count = item->fields_size_;
   for (uint32_t i = 0; i < field_count; i++) {
-    const DexFile::FieldId& field = dex_file_->GetFieldId(field_item->field_idx_);
-    if (UNLIKELY(field.class_idx_ != defining_class)) {
+    LOAD_FIELD(field, field_item->field_idx_, "inter_annotations_directory_item field_id",
+               return false)
+    if (UNLIKELY(field->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for field_annotation");
       return false;
     }
@@ -1754,8 +1854,9 @@
       reinterpret_cast<const DexFile::MethodAnnotationsItem*>(field_item);
   uint32_t method_count = item->methods_size_;
   for (uint32_t i = 0; i < method_count; i++) {
-    const DexFile::MethodId& method = dex_file_->GetMethodId(method_item->method_idx_);
-    if (UNLIKELY(method.class_idx_ != defining_class)) {
+    LOAD_METHOD(method, method_item->method_idx_, "inter_annotations_directory_item method_id",
+                return false)
+    if (UNLIKELY(method->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for method_annotation");
       return false;
     }
@@ -1770,8 +1871,9 @@
       reinterpret_cast<const DexFile::ParameterAnnotationsItem*>(method_item);
   uint32_t parameter_count = item->parameters_size_;
   for (uint32_t i = 0; i < parameter_count; i++) {
-    const DexFile::MethodId& parameter_method = dex_file_->GetMethodId(parameter_item->method_idx_);
-    if (UNLIKELY(parameter_method.class_idx_ != defining_class)) {
+    LOAD_METHOD(parameter_method, parameter_item->method_idx_,
+                "inter_annotations_directory_item parameter method_id", return false)
+    if (UNLIKELY(parameter_method->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for parameter_annotation");
       return false;
     }
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 3337785..f845993 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -71,8 +71,11 @@
   bool CheckIntraSection();
 
   bool CheckOffsetToTypeMap(size_t offset, uint16_t type);
-  uint16_t FindFirstClassDataDefiner(const byte* ptr) const;
-  uint16_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr) const;
+
+  // Note: as sometimes kDexNoIndex16, being 0xFFFF, is a valid return value, we need an
+  // additional out parameter to signal any errors loading an index.
+  uint16_t FindFirstClassDataDefiner(const byte* ptr, bool* success);
+  uint16_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr, bool* success);
 
   bool CheckInterStringIdItem();
   bool CheckInterTypeIdItem();
@@ -88,6 +91,16 @@
   bool CheckInterSectionIterate(size_t offset, uint32_t count, uint16_t type);
   bool CheckInterSection();
 
+  // Load a string by (type) index. Checks whether the index is in bounds, printing the error if
+  // not. If there is an error, nullptr is returned.
+  const char* CheckLoadStringByIdx(uint32_t idx, const char* error_fmt);
+  const char* CheckLoadStringByTypeIdx(uint32_t type_idx, const char* error_fmt);
+
+  // Load a field/method Id by index. Checks whether the index is in bounds, printing the error if
+  // not. If there is an error, nullptr is returned.
+  const DexFile::FieldId* CheckLoadFieldId(uint32_t idx, const char* error_fmt);
+  const DexFile::MethodId* CheckLoadMethodId(uint32_t idx, const char* error_fmt);
+
   void ErrorStringPrintf(const char* fmt, ...)
       __attribute__((__format__(__printf__, 2, 3))) COLD_ATTR;
 
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
new file mode 100644
index 0000000..d0ce00f
--- /dev/null
+++ b/runtime/dex_file_verifier_test.cc
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file_verifier.h"
+
+#include <memory>
+#include "zlib.h"
+
+#include "common_runtime_test.h"
+#include "base/macros.h"
+
+namespace art {
+
+class DexFileVerifierTest : public CommonRuntimeTest {};
+
+static const byte kBase64Map[256] = {
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255,  62, 255, 255, 255,  63,
+  52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255,
+  255, 254, 255, 255, 255,   0,   1,   2,   3,   4,   5,   6,
+    7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  // NOLINT
+   19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,  // NOLINT
+  255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,
+   37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  // NOLINT
+   49,  50,  51, 255, 255, 255, 255, 255, 255, 255, 255, 255,  // NOLINT
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255
+};
+
+static inline byte* DecodeBase64(const char* src, size_t* dst_size) {
+  std::vector<byte> tmp;
+  uint32_t t = 0, y = 0;
+  int g = 3;
+  for (size_t i = 0; src[i] != '\0'; ++i) {
+    byte c = kBase64Map[src[i] & 0xFF];
+    if (c == 255) continue;
+    // the final = symbols are read and used to trim the remaining bytes
+    if (c == 254) {
+      c = 0;
+      // prevent g < 0 which would potentially allow an overflow later
+      if (--g < 0) {
+        *dst_size = 0;
+        return nullptr;
+      }
+    } else if (g != 3) {
+      // we only allow = to be at the end
+      *dst_size = 0;
+      return nullptr;
+    }
+    t = (t << 6) | c;
+    if (++y == 4) {
+      tmp.push_back((t >> 16) & 255);
+      if (g > 1) {
+        tmp.push_back((t >> 8) & 255);
+      }
+      if (g > 2) {
+        tmp.push_back(t & 255);
+      }
+      y = t = 0;
+    }
+  }
+  if (y != 0) {
+    *dst_size = 0;
+    return nullptr;
+  }
+  std::unique_ptr<byte[]> dst(new byte[tmp.size()]);
+  if (dst_size != nullptr) {
+    *dst_size = tmp.size();
+  } else {
+    *dst_size = 0;
+  }
+  std::copy(tmp.begin(), tmp.end(), dst.get());
+  return dst.release();
+}
+
+static const DexFile* OpenDexFileBase64(const char* base64, const char* location,
+                                        std::string* error_msg) {
+  // decode base64
+  CHECK(base64 != NULL);
+  size_t length;
+  std::unique_ptr<byte[]> dex_bytes(DecodeBase64(base64, &length));
+  CHECK(dex_bytes.get() != NULL);
+
+  // write to provided file
+  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
+  CHECK(file.get() != NULL);
+  if (!file->WriteFully(dex_bytes.get(), length)) {
+    PLOG(FATAL) << "Failed to write base64 as dex file";
+  }
+  file.reset();
+
+  // read dex file
+  ScopedObjectAccess soa(Thread::Current());
+  return DexFile::Open(location, location, error_msg);
+}
+
+
+// For reference.
+static const char kGoodTestDex[] =
+    "ZGV4CjAzNQDrVbyVkxX1HljTznNf95AglkUAhQuFtmKkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAN"
+    "AAAAcAAAAAYAAACkAAAAAgAAALwAAAABAAAA1AAAAAQAAADcAAAAAQAAAPwAAACIAQAAHAEAAFoB"
+    "AABiAQAAagEAAIEBAACVAQAAqQEAAL0BAADDAQAAzgEAANEBAADVAQAA2gEAAN8BAAABAAAAAgAA"
+    "AAMAAAAEAAAABQAAAAgAAAAIAAAABQAAAAAAAAAJAAAABQAAAFQBAAAEAAEACwAAAAAAAAAAAAAA"
+    "AAAAAAoAAAABAAEADAAAAAIAAAAAAAAAAAAAAAEAAAACAAAAAAAAAAcAAAAAAAAA8wEAAAAAAAAB"
+    "AAEAAQAAAOgBAAAEAAAAcBADAAAADgACAAAAAgAAAO0BAAAIAAAAYgAAABoBBgBuIAIAEAAOAAEA"
+    "AAADAAY8aW5pdD4ABkxUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABJMamF2YS9sYW5nL09i"
+    "amVjdDsAEkxqYXZhL2xhbmcvU3RyaW5nOwASTGphdmEvbGFuZy9TeXN0ZW07AARUZXN0AAlUZXN0"
+    "LmphdmEAAVYAAlZMAANmb28AA291dAAHcHJpbnRsbgABAAcOAAMABw54AAAAAgAAgYAEnAIBCbQC"
+    "AAAADQAAAAAAAAABAAAAAAAAAAEAAAANAAAAcAAAAAIAAAAGAAAApAAAAAMAAAACAAAAvAAAAAQA"
+    "AAABAAAA1AAAAAUAAAAEAAAA3AAAAAYAAAABAAAA/AAAAAEgAAACAAAAHAEAAAEQAAABAAAAVAEA"
+    "AAIgAAANAAAAWgEAAAMgAAACAAAA6AEAAAAgAAABAAAA8wEAAAAQAAABAAAABAIAAA==";
+
+TEST_F(DexFileVerifierTest, GoodDex) {
+  ScratchFile tmp;
+  std::string error_msg;
+  std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kGoodTestDex, tmp.GetFilename().c_str(),
+                                                       &error_msg));
+  ASSERT_TRUE(raw.get() != nullptr) << error_msg;
+}
+
+static void FixUpChecksum(byte* dex_file) {
+  DexFile::Header* header = reinterpret_cast<DexFile::Header*>(dex_file);
+  uint32_t expected_size = header->file_size_;
+  uint32_t adler_checksum = adler32(0L, Z_NULL, 0);
+  const uint32_t non_sum = sizeof(DexFile::Header::magic_) + sizeof(DexFile::Header::checksum_);
+  const byte* non_sum_ptr = dex_file + non_sum;
+  adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum);
+  header->checksum_ = adler_checksum;
+}
+
+static const DexFile* FixChecksumAndOpen(byte* bytes, size_t length, const char* location,
+                                         std::string* error_msg) {
+  // Check data.
+  CHECK(bytes != nullptr);
+
+  // Fixup of checksum.
+  FixUpChecksum(bytes);
+
+  // write to provided file
+  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
+  CHECK(file.get() != NULL);
+  if (!file->WriteFully(bytes, length)) {
+    PLOG(FATAL) << "Failed to write base64 as dex file";
+  }
+  file.reset();
+
+  // read dex file
+  ScopedObjectAccess soa(Thread::Current());
+  return DexFile::Open(location, location, error_msg);
+}
+
+static bool ModifyAndLoad(const char* location, size_t offset, uint8_t new_val,
+                                    std::string* error_msg) {
+  // Decode base64.
+  size_t length;
+  std::unique_ptr<byte[]> dex_bytes(DecodeBase64(kGoodTestDex, &length));
+  CHECK(dex_bytes.get() != NULL);
+
+  // Make modifications.
+  dex_bytes.get()[offset] = new_val;
+
+  // Fixup and load.
+  std::unique_ptr<const DexFile> file(FixChecksumAndOpen(dex_bytes.get(), length, location,
+                                                         error_msg));
+  return file.get() != nullptr;
+}
+
+TEST_F(DexFileVerifierTest, MethodId) {
+  {
+    // Class error.
+    ScratchFile tmp;
+    std::string error_msg;
+    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg);
+    ASSERT_TRUE(success);
+    ASSERT_NE(error_msg.find("inter_method_id_item class_idx"), std::string::npos) << error_msg;
+  }
+
+  {
+    // Proto error.
+    ScratchFile tmp;
+    std::string error_msg;
+    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg);
+    ASSERT_TRUE(success);
+    ASSERT_NE(error_msg.find("inter_method_id_item proto_idx"), std::string::npos) << error_msg;
+  }
+
+  {
+    // Name error.
+    ScratchFile tmp;
+    std::string error_msg;
+    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg);
+    ASSERT_TRUE(success);
+    ASSERT_NE(error_msg.find("inter_method_id_item name_idx"), std::string::npos) << error_msg;
+  }
+}
+
+}  // namespace art
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 0494f22..0a71d62 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -20,6 +20,7 @@
 
 #include <iomanip>
 
+#include "base/stringprintf.h"
 #include "dex_file-inl.h"
 #include "utils.h"
 
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 1ff5c19..edba502 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -145,27 +145,28 @@
   };
 
   enum VerifyFlag {
-    kVerifyNone            = 0x00000,
-    kVerifyRegA            = 0x00001,
-    kVerifyRegAWide        = 0x00002,
-    kVerifyRegB            = 0x00004,
-    kVerifyRegBField       = 0x00008,
-    kVerifyRegBMethod      = 0x00010,
-    kVerifyRegBNewInstance = 0x00020,
-    kVerifyRegBString      = 0x00040,
-    kVerifyRegBType        = 0x00080,
-    kVerifyRegBWide        = 0x00100,
-    kVerifyRegC            = 0x00200,
-    kVerifyRegCField       = 0x00400,
-    kVerifyRegCNewArray    = 0x00800,
-    kVerifyRegCType        = 0x01000,
-    kVerifyRegCWide        = 0x02000,
-    kVerifyArrayData       = 0x04000,
-    kVerifyBranchTarget    = 0x08000,
-    kVerifySwitchTargets   = 0x10000,
-    kVerifyVarArg          = 0x20000,
-    kVerifyVarArgRange     = 0x40000,
-    kVerifyError           = 0x80000,
+    kVerifyNone            = 0x000000,
+    kVerifyRegA            = 0x000001,
+    kVerifyRegAWide        = 0x000002,
+    kVerifyRegB            = 0x000004,
+    kVerifyRegBField       = 0x000008,
+    kVerifyRegBMethod      = 0x000010,
+    kVerifyRegBNewInstance = 0x000020,
+    kVerifyRegBString      = 0x000040,
+    kVerifyRegBType        = 0x000080,
+    kVerifyRegBWide        = 0x000100,
+    kVerifyRegC            = 0x000200,
+    kVerifyRegCField       = 0x000400,
+    kVerifyRegCNewArray    = 0x000800,
+    kVerifyRegCType        = 0x001000,
+    kVerifyRegCWide        = 0x002000,
+    kVerifyArrayData       = 0x004000,
+    kVerifyBranchTarget    = 0x008000,
+    kVerifySwitchTargets   = 0x010000,
+    kVerifyVarArg          = 0x020000,
+    kVerifyVarArgRange     = 0x040000,
+    kVerifyRuntimeOnly     = 0x080000,
+    kVerifyError           = 0x100000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
@@ -493,18 +494,23 @@
   }
 
   int GetVerifyTypeArgumentB() const {
-    return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegB | kVerifyRegBField | kVerifyRegBMethod |
-             kVerifyRegBNewInstance | kVerifyRegBString | kVerifyRegBType | kVerifyRegBWide));
+    return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegB | kVerifyRegBField |
+        kVerifyRegBMethod | kVerifyRegBNewInstance | kVerifyRegBString | kVerifyRegBType |
+        kVerifyRegBWide));
   }
 
   int GetVerifyTypeArgumentC() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
-             kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
+        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
   int GetVerifyExtraFlags() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyArrayData | kVerifyBranchTarget |
-             kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgRange | kVerifyError));
+        kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgRange | kVerifyError));
+  }
+
+  bool GetVerifyIsRuntimeOnly() const {
+    return (kInstructionVerifyFlags[Opcode()] & kVerifyRuntimeOnly) != 0;
   }
 
   // Get the dex PC of this instruction as a offset in code units from the beginning of insns.
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index f43e42f..4cda58b 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -245,14 +245,14 @@
   V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, true, kNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
   V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, true, kNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
   V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, true, kNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB) \
-  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB) \
-  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArg) \
-  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRange) \
+  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArg | kVerifyRuntimeOnly) \
+  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRange | kVerifyRuntimeOnly) \
   V(0xEB, UNUSED_EB, "unused-eb", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xEC, UNUSED_EC, "unused-ec", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xED, UNUSED_ED, "unused-ed", k10x, false, kUnknown, 0, kVerifyError) \
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 5d20096..0df8211 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -20,6 +20,7 @@
 #include <unistd.h>
 
 #include "base/logging.h"
+#include "base/stringprintf.h"
 #include "base/stl_util.h"
 #include "utils.h"
 #include "instruction_set.h"
diff --git a/runtime/elf_utils.h b/runtime/elf_utils.h
index f3ec713..ce8587b 100644
--- a/runtime/elf_utils.h
+++ b/runtime/elf_utils.h
@@ -17,9 +17,10 @@
 #ifndef ART_RUNTIME_ELF_UTILS_H_
 #define ART_RUNTIME_ELF_UTILS_H_
 
-// Include the micro-API to avoid potential macro conflicts with the
-// compiler's own elf.h file.
-#include "../../bionic/libc/kernel/uapi/linux/elf.h"
+#include <sys/cdefs.h>
+
+// Explicitly include elf.h from elfutils to avoid Linux and other dependencies.
+#include "../../external/elfutils/0.153/libelf/elf.h"
 
 // Architecture dependent flags for the ELF header.
 #define EF_ARM_EABI_VER5 0x05000000
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 320273d..a0e32f5 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -189,18 +189,21 @@
       // Do nothing.
       return zero;
     } else {
+      StackHandleScope<1> hs(soa.Self());
+      MethodHelper mh_interface_method(
+          hs.NewHandle(soa.Decode<mirror::ArtMethod*>(interface_method_jobj)));
+      // This can cause thread suspension.
+      mirror::Class* result_type = mh_interface_method.GetReturnType();
       mirror::Object* result_ref = soa.Decode<mirror::Object*>(result);
       mirror::Object* rcvr = soa.Decode<mirror::Object*>(rcvr_jobj);
-      mirror::ArtMethod* interface_method =
-          soa.Decode<mirror::ArtMethod*>(interface_method_jobj);
-      mirror::Class* result_type = MethodHelper(interface_method).GetReturnType();
       mirror::ArtMethod* proxy_method;
-      if (interface_method->GetDeclaringClass()->IsInterface()) {
-        proxy_method = rcvr->GetClass()->FindVirtualMethodForInterface(interface_method);
+      if (mh_interface_method.GetMethod()->GetDeclaringClass()->IsInterface()) {
+        proxy_method = rcvr->GetClass()->FindVirtualMethodForInterface(
+            mh_interface_method.GetMethod());
       } else {
         // Proxy dispatch to a method defined in Object.
-        DCHECK(interface_method->GetDeclaringClass()->IsObjectClass());
-        proxy_method = interface_method;
+        DCHECK(mh_interface_method.GetMethod()->GetDeclaringClass()->IsObjectClass());
+        proxy_method = mh_interface_method.GetMethod();
       }
       ThrowLocation throw_location(rcvr, proxy_method, -1);
       JValue result_unboxed;
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 58b4286..3d8b29f 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -327,8 +327,8 @@
       ThrowIllegalAccessErrorFinalField(referrer, resolved_field);
       return nullptr;  // Failure.
     } else {
-      FieldHelper fh(resolved_field);
-      if (UNLIKELY(fh.IsPrimitiveType() != is_primitive || fh.FieldSize() != expected_size)) {
+      if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
+                   resolved_field->FieldSize() != expected_size)) {
         ThrowLocation throw_location = self->GetCurrentLocationForThrow();
         DCHECK(throw_location.GetMethod() == referrer);
         self->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
@@ -433,9 +433,8 @@
       if (access_check &&
           (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
         // Behavior to agree with that of the verifier.
-        MethodHelper mh(resolved_method);
-        ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(), mh.GetName(),
-                               mh.GetSignature());
+        ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
+                               resolved_method->GetName(), resolved_method->GetSignature());
         return nullptr;  // Failure.
       }
       DCHECK(vtable != nullptr);
@@ -450,9 +449,8 @@
         vtable = (super_class != nullptr) ? super_class->GetVTable() : nullptr;
         if (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength())) {
           // Behavior to agree with that of the verifier.
-          MethodHelper mh(resolved_method);
-          ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(), mh.GetName(),
-                                 mh.GetSignature());
+          ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
+                                 resolved_method->GetName(), resolved_method->GetSignature());
           return nullptr;  // Failure.
         }
       } else {
@@ -553,9 +551,8 @@
     // Illegal access.
     return NULL;
   }
-  FieldHelper fh(resolved_field);
-  if (UNLIKELY(fh.IsPrimitiveType() != is_primitive ||
-               fh.FieldSize() != expected_size)) {
+  if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
+               resolved_field->FieldSize() != expected_size)) {
     return NULL;
   }
   return resolved_field;
@@ -655,6 +652,7 @@
   // Save any pending exception over monitor exit call.
   mirror::Throwable* saved_exception = NULL;
   ThrowLocation saved_throw_location;
+  bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
   if (UNLIKELY(self->IsExceptionPending())) {
     saved_exception = self->GetException(&saved_throw_location);
     self->ClearException();
@@ -670,6 +668,7 @@
   // Restore pending exception.
   if (saved_exception != NULL) {
     self->SetException(saved_throw_location, saved_exception);
+    self->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
 }
 
@@ -683,11 +682,13 @@
     JniAbortF(NULL, "invalid reference returned from %s", PrettyMethod(m).c_str());
   }
   // Make sure that the result is an instance of the type this method was expected to return.
-  mirror::Class* return_type = MethodHelper(m).GetReturnType();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ArtMethod> h_m(hs.NewHandle(m));
+  mirror::Class* return_type = MethodHelper(h_m).GetReturnType();
 
   if (!o->InstanceOf(return_type)) {
-    JniAbortF(NULL, "attempt to return an instance of %s from %s",
-              PrettyTypeOf(o).c_str(), PrettyMethod(m).c_str());
+    JniAbortF(NULL, "attempt to return an instance of %s from %s", PrettyTypeOf(o).c_str(),
+              PrettyMethod(h_m.Get()).c_str());
   }
 }
 
diff --git a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
index 1005d0e..335a617 100644
--- a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
@@ -26,7 +26,7 @@
                                                        mirror::Array* array,
                                                        uint32_t payload_offset)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const DexFile::CodeItem* code_item = MethodHelper(method).GetCodeItem();
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
   const Instruction::ArrayDataPayload* payload =
       reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item->insns_ + payload_offset);
   DCHECK_EQ(payload->ident, static_cast<uint16_t>(Instruction::kArrayDataSignature));
diff --git a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
index 3a898e8..eb50ec3 100644
--- a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
@@ -41,9 +41,8 @@
 
   // When we return, the caller will branch to this address, so it had better not be 0!
   if (UNLIKELY(code == NULL)) {
-      MethodHelper mh(method);
       LOG(FATAL) << "Code was NULL in method: " << PrettyMethod(method)
-                 << " location: " << mh.GetDexFile().GetLocation();
+                 << " location: " << method->GetDexFile()->GetLocation();
   }
   return method;
 }
diff --git a/runtime/entrypoints/portable/portable_throw_entrypoints.cc b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
index 1fdb832..189e6b5 100644
--- a/runtime/entrypoints/portable/portable_throw_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
@@ -79,8 +79,9 @@
     return -1;
   }
   mirror::Class* exception_type = exception->GetClass();
-  MethodHelper mh(current_method);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  StackHandleScope<1> hs(self);
+  MethodHelper mh(hs.NewHandle(current_method));
+  const DexFile::CodeItem* code_item = current_method->GetCodeItem();
   DCHECK_LT(ti_offset, code_item->tries_size_);
   const DexFile::TryItem* try_item = DexFile::GetTryItems(*code_item, ti_offset);
 
@@ -102,7 +103,7 @@
       // TODO: check, the verifier (class linker?) should take care of resolving all exception
       //       classes early.
       LOG(WARNING) << "Unresolved exception class when finding catch block: "
-          << mh.GetTypeDescriptorFromTypeIdx(iter_type_idx);
+          << current_method->GetTypeDescriptorFromTypeIdx(iter_type_idx);
     } else if (iter_exception_type->IsAssignableFrom(exception_type)) {
       catch_dex_pc = it.GetHandlerAddress();
       result = iter_index;
@@ -112,13 +113,11 @@
   }
   if (result != -1) {
     // Handler found.
-    Runtime::Current()->GetInstrumentation()->ExceptionCaughtEvent(self,
-                                                                   throw_location,
-                                                                   current_method,
-                                                                   catch_dex_pc,
-                                                                   exception);
+    Runtime::Current()->GetInstrumentation()->ExceptionCaughtEvent(
+        self, throw_location, current_method, catch_dex_pc, exception);
     // If the catch block has no move-exception then clear the exception for it.
-    const Instruction* first_catch_instr = Instruction::At(&mh.GetCodeItem()->insns_[catch_dex_pc]);
+    const Instruction* first_catch_instr = Instruction::At(
+        &current_method->GetCodeItem()->insns_[catch_dex_pc]);
     if (first_catch_instr->Opcode() != Instruction::MOVE_EXCEPTION) {
       self->ClearException();
     }
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 3756f47..6825e78 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -196,8 +196,9 @@
     return 0;
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension("Building interpreter shadow frame");
-    MethodHelper mh(method);
-    const DexFile::CodeItem* code_item = mh.GetCodeItem();
+    StackHandleScope<2> hs(self);
+    MethodHelper mh(hs.NewHandle(method));
+    const DexFile::CodeItem* code_item = method->GetCodeItem();
     uint16_t num_regs = code_item->registers_size_;
     void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
     ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, NULL,  // No last shadow coming from quick.
@@ -214,7 +215,6 @@
 
     if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
       // Ensure static method's class is initialized.
-      StackHandleScope<1> hs(self);
       Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
       if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
         DCHECK(Thread::Current()->IsExceptionPending());
@@ -294,7 +294,8 @@
   jobject rcvr_jobj = soa.AddLocalReference<jobject>(receiver);
 
   // Placing arguments into args vector and remove the receiver.
-  MethodHelper proxy_mh(proxy_method);
+  StackHandleScope<1> hs(self);
+  MethodHelper proxy_mh(hs.NewHandle(proxy_method));
   std::vector<jvalue> args;
   BuildPortableArgumentVisitor local_ref_visitor(proxy_mh, sp, soa, args);
   local_ref_visitor.VisitArguments();
@@ -327,7 +328,7 @@
   InvokeType invoke_type;
   bool is_range;
   if (called->IsRuntimeMethod()) {
-    const DexFile::CodeItem* code = MethodHelper(caller).GetCodeItem();
+    const DexFile::CodeItem* code = caller->GetCodeItem();
     CHECK_LT(dex_pc, code->insns_size_in_code_units_);
     const Instruction* instr = Instruction::At(&code->insns_[dex_pc]);
     Instruction::Code instr_code = instr->Opcode();
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index b582abb..e573d6d 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -18,8 +18,17 @@
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
 #include "base/mutex.h"
+#include "instruction_set.h"
 #include "thread-inl.h"
 
+// Specific frame size code is in architecture-specific files. We include this to compile-time
+// specialize the code.
+#include "arch/arm/quick_method_frame_info_arm.h"
+#include "arch/arm64/quick_method_frame_info_arm64.h"
+#include "arch/mips/quick_method_frame_info_mips.h"
+#include "arch/x86/quick_method_frame_info_x86.h"
+#include "arch/x86_64/quick_method_frame_info_x86_64.h"
+
 namespace art {
 namespace mirror {
 class ArtMethod;
@@ -36,6 +45,34 @@
   self->VerifyStack();
 }
 
+static constexpr size_t GetCalleeSaveFrameSize(InstructionSet isa, Runtime::CalleeSaveType type) {
+  // constexpr must be a return statement.
+  return (isa == kArm || isa == kThumb2) ? arm::ArmCalleeSaveFrameSize(type) :
+         isa == kArm64 ? arm64::Arm64CalleeSaveFrameSize(type) :
+         isa == kMips ? mips::MipsCalleeSaveFrameSize(type) :
+         isa == kX86 ? x86::X86CalleeSaveFrameSize(type) :
+         isa == kX86_64 ? x86_64::X86_64CalleeSaveFrameSize(type) :
+         isa == kNone ? (LOG(FATAL) << "kNone has no frame size", 0) :
+         (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+// Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
+static constexpr size_t GetConstExprPointerSize(InstructionSet isa) {
+  // constexpr must be a return statement.
+  return (isa == kArm || isa == kThumb2) ? kArmPointerSize :
+         isa == kArm64 ? kArm64PointerSize :
+         isa == kMips ? kMipsPointerSize :
+         isa == kX86 ? kX86PointerSize :
+         isa == kX86_64 ? kX86_64PointerSize :
+         isa == kNone ? (LOG(FATAL) << "kNone has no pointer size", 0) :
+         (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+// Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
+static constexpr size_t GetCalleeSavePCOffset(InstructionSet isa, Runtime::CalleeSaveType type) {
+  return GetCalleeSaveFrameSize(isa, type) - GetConstExprPointerSize(isa);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 7bd1582..469d373 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -115,7 +115,6 @@
 
   // Intrinsics
   int32_t (*pIndexOf)(void*, uint32_t, uint32_t, uint32_t);
-  int32_t (*pMemcmp16)(void*, void*, int32_t);
   int32_t (*pStringCompareTo)(void*, void*);
   void* (*pMemcpy)(void*, const void*, size_t);
 
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 844367d..5cb0f36 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -197,7 +197,7 @@
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
-    if (LIKELY(!FieldHelper(field).IsPrimitiveType())) {
+    if (LIKELY(!field->IsPrimitiveType())) {
       // Compiled code can't use transactional mode.
       field->SetObj<false>(field->GetDeclaringClass(), new_value);
       return 0;  // success
@@ -226,8 +226,12 @@
     return 0;  // success
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                          sizeof(int32_t));
+  {
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
+                                                            sizeof(int32_t));
+  }
   if (LIKELY(field != NULL)) {
     if (UNLIKELY(obj == NULL)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
@@ -244,10 +248,7 @@
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
                                         Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Runtime* runtime = Runtime::Current();
-  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  uint32_t frame_size =
-      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes();
+  constexpr size_t frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsOnly);
   mirror::ArtMethod* referrer =
       reinterpret_cast<StackReference<mirror::ArtMethod>*>(
           reinterpret_cast<uint8_t*>(sp) + frame_size)->AsMirrorPtr();
@@ -258,7 +259,7 @@
     field->Set64<false>(obj, new_value);
     return 0;  // success
   }
-  sp->Assign(callee_save);
+  sp->Assign(Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly));
   self->SetTopOfStack(sp, 0);
   field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
                                                           sizeof(int64_t));
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 6ef075d..d161d0b 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "callee_save_frame.h"
+#include "instruction_set.h"
 #include "instrumentation.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
@@ -40,9 +41,10 @@
   return result;
 }
 
-extern "C" uint64_t artInstrumentationMethodExitFromCode(Thread* self,
-                                                         StackReference<mirror::ArtMethod>* sp,
-                                                         uint64_t gpr_result, uint64_t fpr_result)
+extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self,
+                                                              StackReference<mirror::ArtMethod>* sp,
+                                                              uint64_t gpr_result,
+                                                              uint64_t fpr_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // TODO: use FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly) not the hand inlined below.
   //       We use the hand inline version to ensure the return_pc is assigned before verifying the
@@ -50,19 +52,16 @@
   // Be aware the store below may well stomp on an incoming argument.
   Locks::mutator_lock_->AssertSharedHeld(self);
   Runtime* runtime = Runtime::Current();
-  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  sp->Assign(callee_save);
-  uint32_t return_pc_offset = callee_save->GetReturnPcOffsetInBytes(
-      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes());
+  sp->Assign(runtime->GetCalleeSaveMethod(Runtime::kRefsOnly));
+  uint32_t return_pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) +
                                                       return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  uint64_t return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(self, return_pc,
-                                                                                   gpr_result,
-                                                                                   fpr_result);
+  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
+      self, return_pc, gpr_result, fpr_result);
   self->VerifyStack();
   return return_or_deoptimize_pc;
 }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1d524cb..514d1aa 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -20,6 +20,7 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "gc/accounting/card_table-inl.h"
+#include "instruction_set.h"
 #include "interpreter/interpreter.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
@@ -36,6 +37,9 @@
 class QuickArgumentVisitor {
   // Number of bytes for each out register in the caller method's frame.
   static constexpr size_t kBytesStackArgLocation = 4;
+  // Frame size in bytes of a callee-save frame for RefsAndArgs.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize =
+      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
 #if defined(__arm__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -58,7 +62,6 @@
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 8;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 44;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 48;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -86,10 +89,9 @@
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
   static constexpr size_t kNumQuickGprArgs = 7;  // 7 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =16;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 144;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 296;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 304;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -114,7 +116,6 @@
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 60;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 64;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -139,7 +140,6 @@
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 32;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -172,12 +172,11 @@
   // | Padding         |
   // | RDI/Method*     |  <- sp
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
-  static constexpr size_t kNumQuickGprArgs = 5;  // 3 arguments passed in GPRs.
-  static constexpr size_t kNumQuickFprArgs = 8;  // 0 arguments passed in FPRs.
+  static constexpr size_t kNumQuickGprArgs = 5;  // 5 arguments passed in GPRs.
+  static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168;  // Offset of return address.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 176;  // Frame size.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     switch (gpr_index) {
       case 0: return (4 * GetBytesPerGprSpillLocation(kRuntimeISA));
@@ -219,10 +218,7 @@
       stack_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize
                   + StackArgumentStartFromShorty(is_static, shorty, shorty_len)),
       gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid),
-      is_split_long_or_double_(false) {
-    DCHECK_EQ(kQuickCalleeSaveFrame_RefAndArgs_FrameSize,
-              Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-  }
+      is_split_long_or_double_(false) { }
 
   virtual ~QuickArgumentVisitor() {}
 
@@ -470,16 +466,16 @@
   } else {
     DCHECK(!method->IsNative()) << PrettyMethod(method);
     const char* old_cause = self->StartAssertNoThreadSuspension("Building interpreter shadow frame");
-    MethodHelper mh(method);
-    const DexFile::CodeItem* code_item = mh.GetCodeItem();
+    const DexFile::CodeItem* code_item = method->GetCodeItem();
     DCHECK(code_item != nullptr) << PrettyMethod(method);
     uint16_t num_regs = code_item->registers_size_;
     void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
     ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, NULL,  // No last shadow coming from quick.
                                                   method, 0, memory));
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
-    BuildQuickShadowFrameVisitor shadow_frame_builder(sp, mh.IsStatic(), mh.GetShorty(),
-                                                      mh.GetShortyLength(),
+    uint32_t shorty_len = 0;
+    const char* shorty = method->GetShorty(&shorty_len);
+    BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
                                                       shadow_frame, first_arg_reg);
     shadow_frame_builder.VisitArguments();
     // Push a transition back into managed code onto the linked list in thread.
@@ -499,6 +495,8 @@
       }
     }
 
+    StackHandleScope<1> hs(self);
+    MethodHelper mh(hs.NewHandle(method));
     JValue result = interpreter::EnterInterpreterFromStub(self, mh, code_item, *shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
@@ -600,11 +598,13 @@
   jobject rcvr_jobj = soa.AddLocalReference<jobject>(receiver);
 
   // Placing arguments into args vector and remove the receiver.
-  MethodHelper proxy_mh(proxy_method);
-  DCHECK(!proxy_mh.IsStatic()) << PrettyMethod(proxy_method);
+  mirror::ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy();
+  CHECK(!non_proxy_method->IsStatic()) << PrettyMethod(proxy_method) << " "
+      << PrettyMethod(non_proxy_method);
   std::vector<jvalue> args;
-  BuildQuickArgumentVisitor local_ref_visitor(sp, proxy_mh.IsStatic(), proxy_mh.GetShorty(),
-                                              proxy_mh.GetShortyLength(), &soa, &args);
+  uint32_t shorty_len = 0;
+  const char* shorty = proxy_method->GetShorty(&shorty_len);
+  BuildQuickArgumentVisitor local_ref_visitor(sp, false, shorty, shorty_len, &soa, &args);
 
   local_ref_visitor.VisitArguments();
   DCHECK_GT(args.size(), 0U) << PrettyMethod(proxy_method);
@@ -619,8 +619,7 @@
   // All naked Object*s should now be in jobjects, so its safe to go into the main invoke code
   // that performs allocations.
   self->EndAssertNoThreadSuspension(old_cause);
-  JValue result = InvokeProxyInvocationHandler(soa, proxy_mh.GetShorty(),
-                                               rcvr_jobj, interface_method_jobj, args);
+  JValue result = InvokeProxyInvocationHandler(soa, shorty, rcvr_jobj, interface_method_jobj, args);
   // Restore references which might have moved.
   local_ref_visitor.FixupReferences();
   return result.GetJ();
@@ -687,11 +686,8 @@
   if (called->IsRuntimeMethod()) {
     uint32_t dex_pc = caller->ToDexPc(QuickArgumentVisitor::GetCallingPc(sp));
     const DexFile::CodeItem* code;
-    {
-      MethodHelper mh(caller);
-      dex_file = &mh.GetDexFile();
-      code = mh.GetCodeItem();
-    }
+    dex_file = caller->GetDexFile();
+    code = caller->GetCodeItem();
     CHECK_LT(dex_pc, code->insns_size_in_code_units_);
     const Instruction* instr = Instruction::At(&code->insns_[dex_pc]);
     Instruction::Code instr_code = instr->Opcode();
@@ -747,7 +743,7 @@
 
   } else {
     invoke_type = kStatic;
-    dex_file = &MethodHelper(called).GetDexFile();
+    dex_file = called->GetDexFile();
     dex_method_idx = called->GetDexMethodIndex();
   }
   uint32_t shorty_len;
@@ -793,9 +789,10 @@
         // Calling from one dex file to another, need to compute the method index appropriate to
         // the caller's dex file. Since we get here only if the original called was a runtime
         // method, we've got the correct dex_file and a dex_method_idx from above.
-        DCHECK(&MethodHelper(caller).GetDexFile() == dex_file);
-        uint32_t method_index =
-            MethodHelper(called).FindDexMethodIndexInOtherDexFile(*dex_file, dex_method_idx);
+        DCHECK_EQ(caller->GetDexFile(), dex_file);
+        StackHandleScope<1> hs(self);
+        MethodHelper mh(hs.NewHandle(called));
+        uint32_t method_index = mh.FindDexMethodIndexInOtherDexFile(*dex_file, dex_method_idx);
         if (method_index != DexFile::kDexNoIndex) {
           caller->GetDexCacheResolvedMethods()->Set<false>(method_index, called);
         }
@@ -1198,10 +1195,8 @@
     size_t scope_and_method = handle_scope_size + sizeof(StackReference<mirror::ArtMethod>);
 
     sp8 -= scope_and_method;
-    // Align by kStackAlignment
-    uintptr_t sp_to_align = reinterpret_cast<uintptr_t>(sp8);
-    sp_to_align = RoundDown(sp_to_align, kStackAlignment);
-    sp8 = reinterpret_cast<uint8_t*>(sp_to_align);
+    // Align by kStackAlignment.
+    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
 
     uint8_t* sp8_table = sp8 + sizeof(StackReference<mirror::ArtMethod>);
     *table = reinterpret_cast<HandleScope*>(sp8_table);
@@ -1221,9 +1216,8 @@
 
     // Next comes the native call stack.
     sp8 -= GetStackSize();
-    // Now align the call stack below. This aligns by 16, as AArch64 seems to require.
-    uintptr_t mask = ~0x0F;
-    sp8 = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(sp8) & mask);
+    // Align by kStackAlignment.
+    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
     *start_stack = reinterpret_cast<uintptr_t*>(sp8);
 
     // put fprs and gprs below
@@ -1507,10 +1501,9 @@
   DCHECK(called->IsNative()) << PrettyMethod(called, true);
 
   // run the visitor
-  MethodHelper mh(called);
-
-  BuildGenericJniFrameVisitor visitor(&sp, called->IsStatic(), mh.GetShorty(), mh.GetShortyLength(),
-                                      self);
+  uint32_t shorty_len = 0;
+  const char* shorty = called->GetShorty(&shorty_len);
+  BuildGenericJniFrameVisitor visitor(&sp, called->IsStatic(), shorty, shorty_len, self);
   visitor.VisitArguments();
   visitor.FinalizeHandleScope(self);
 
@@ -1551,7 +1544,7 @@
 
       // End JNI, as the assembly will move to deliver the exception.
       jobject lock = called->IsSynchronized() ? visitor.GetFirstHandleScopeJObject() : nullptr;
-      if (mh.GetShorty()[0] == 'L') {
+      if (shorty[0] == 'L') {
         artQuickGenericJniEndJNIRef(self, cookie, nullptr, lock);
       } else {
         artQuickGenericJniEndJNINonRef(self, cookie, lock);
@@ -1590,8 +1583,7 @@
     lock = table->GetHandle(0).ToJObject();
   }
 
-  MethodHelper mh(called);
-  char return_shorty_char = mh.GetShorty()[0];
+  char return_shorty_char = called->GetShorty()[0];
 
   if (return_shorty_char == 'L') {
     return artQuickGenericJniEndJNIRef(self, cookie, result.l, lock);
@@ -1623,70 +1615,19 @@
   }
 }
 
-// The following definitions create return types for two word-sized entities that will be passed
-// in registers so that memory operations for the interface trampolines can be avoided. The entities
-// are the resolved method and the pointer to the code to be invoked.
+// We use TwoWordReturn to optimize scalar returns. We use the hi value for code, and the lo value
+// for the method pointer.
 //
-// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
-// uint64_t or long long int. We use the upper 32b for code, and the lower 32b for the method.
-//
-// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
-// size_t-sized values.
-//
-// We need two operations:
-//
-// 1) A flag value that signals failure. The assembly stubs expect the method part to be "0".
-//    GetFailureValue() will return a value that has method == 0.
-//
-// 2) A value that combines a code pointer and a method pointer.
-//    GetSuccessValue() constructs this.
-
-#if defined(__i386__) || defined(__arm__) || defined(__mips__)
-typedef uint64_t MethodAndCode;
-
-// Encodes method_ptr==nullptr and code_ptr==nullptr
-static constexpr MethodAndCode GetFailureValue() {
-  return 0;
-}
-
-// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
-static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
-  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
-  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
-  return ((code_uint << 32) | method_uint);
-}
-
-#elif defined(__x86_64__) || defined(__aarch64__)
-struct MethodAndCode {
-  uintptr_t method;
-  uintptr_t code;
-};
-
-// Encodes method_ptr==nullptr. Leaves random value in code pointer.
-static MethodAndCode GetFailureValue() {
-  MethodAndCode ret;
-  ret.method = 0;
-  return ret;
-}
-
-// Write values into their respective members.
-static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
-  MethodAndCode ret;
-  ret.method = reinterpret_cast<uintptr_t>(method);
-  ret.code = reinterpret_cast<uintptr_t>(code);
-  return ret;
-}
-#else
-#error "Unsupported architecture"
-#endif
+// It is valid to use this, as at the usage points here (returns from C functions) we are assuming
+// to hold the mutator lock (see SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) annotations).
 
 template<InvokeType type, bool access_check>
-static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
                                      mirror::ArtMethod* caller_method,
                                      Thread* self, StackReference<mirror::ArtMethod>* sp);
 
 template<InvokeType type, bool access_check>
-static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
                                      mirror::ArtMethod* caller_method,
                                      Thread* self, StackReference<mirror::ArtMethod>* sp) {
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check,
@@ -1709,7 +1650,7 @@
 
     if (UNLIKELY(method == NULL)) {
       CHECK(self->IsExceptionPending());
-      return GetFailureValue();  // Failure.
+      return GetTwoWordFailureValue();  // Failure.
     }
   }
   DCHECK(!self->IsExceptionPending());
@@ -1717,15 +1658,16 @@
 
   // When we return, the caller will branch to this address, so it had better not be 0!
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
-      << MethodHelper(method).GetDexFile().GetLocation();
+      << method->GetDexFile()->GetLocation();
 
-  return GetSuccessValue(code, method);
+  return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
+                                reinterpret_cast<uintptr_t>(method));
 }
 
 // Explicit artInvokeCommon template function declarations to please analysis tool.
 #define EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(type, access_check)                                \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
-  MethodAndCode artInvokeCommon<type, access_check>(uint32_t method_idx,                        \
+  TwoWordReturn artInvokeCommon<type, access_check>(uint32_t method_idx,                        \
                                                     mirror::Object* this_object,                \
                                                     mirror::ArtMethod* caller_method,           \
                                                     Thread* self,                               \
@@ -1745,7 +1687,7 @@
 
 
 // See comments in runtime_support_asm.S
-extern "C" MethodAndCode artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1754,7 +1696,7 @@
 }
 
 
-extern "C" MethodAndCode artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1762,7 +1704,7 @@
   return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" MethodAndCode artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1770,7 +1712,7 @@
   return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" MethodAndCode artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1778,7 +1720,7 @@
   return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" MethodAndCode artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
+extern "C" TwoWordReturn artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
     mirror::Object* this_object,
     mirror::ArtMethod* caller_method,
     Thread* self,
@@ -1787,7 +1729,7 @@
 }
 
 // Determine target of interface dispatch. This object is known non-null.
-extern "C" MethodAndCode artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
+extern "C" TwoWordReturn artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
                                                       mirror::Object* this_object,
                                                       mirror::ArtMethod* caller_method,
                                                       Thread* self,
@@ -1800,73 +1742,19 @@
       FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(interface_method, this_object,
                                                                  caller_method);
-      return GetFailureValue();  // Failure.
+      return GetTwoWordFailureValue();  // Failure.
     }
   } else {
     FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
     DCHECK(interface_method == Runtime::Current()->GetResolutionMethod());
-    // Determine method index from calling dex instruction.
-#if defined(__arm__)
-    // On entry the stack pointed by sp is:
-    // | argN       |  |
-    // | ...        |  |
-    // | arg4       |  |
-    // | arg3 spill |  |  Caller's frame
-    // | arg2 spill |  |
-    // | arg1 spill |  |
-    // | Method*    | ---
-    // | LR         |
-    // | ...        |    callee saves
-    // | R3         |    arg3
-    // | R2         |    arg2
-    // | R1         |    arg1
-    // | R0         |
-    // | Method*    |  <- sp
-    DCHECK_EQ(48U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + kPointerSize);
-    uintptr_t caller_pc = regs[10];
-#elif defined(__i386__)
-    // On entry the stack pointed by sp is:
-    // | argN        |  |
-    // | ...         |  |
-    // | arg4        |  |
-    // | arg3 spill  |  |  Caller's frame
-    // | arg2 spill  |  |
-    // | arg1 spill  |  |
-    // | Method*     | ---
-    // | Return      |
-    // | EBP,ESI,EDI |    callee saves
-    // | EBX         |    arg3
-    // | EDX         |    arg2
-    // | ECX         |    arg1
-    // | EAX/Method* |  <- sp
-    DCHECK_EQ(32U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp));
-    uintptr_t caller_pc = regs[7];
-#elif defined(__mips__)
-    // On entry the stack pointed by sp is:
-    // | argN       |  |
-    // | ...        |  |
-    // | arg4       |  |
-    // | arg3 spill |  |  Caller's frame
-    // | arg2 spill |  |
-    // | arg1 spill |  |
-    // | Method*    | ---
-    // | RA         |
-    // | ...        |    callee saves
-    // | A3         |    arg3
-    // | A2         |    arg2
-    // | A1         |    arg1
-    // | A0/Method* |  <- sp
-    DCHECK_EQ(64U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
-    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp));
-    uintptr_t caller_pc = regs[15];
-#else
-    UNIMPLEMENTED(FATAL);
-    uintptr_t caller_pc = 0;
-#endif
+
+    // Find the caller PC.
+    constexpr size_t pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs);
+    uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + pc_offset);
+
+    // Map the caller PC to a dex PC.
     uint32_t dex_pc = caller_method->ToDexPc(caller_pc);
-    const DexFile::CodeItem* code = MethodHelper(caller_method).GetCodeItem();
+    const DexFile::CodeItem* code = caller_method->GetCodeItem();
     CHECK_LT(dex_pc, code->insns_size_in_code_units_);
     const Instruction* instr = Instruction::At(&code->insns_[dex_pc]);
     Instruction::Code instr_code = instr->Opcode();
@@ -1897,16 +1785,17 @@
 
     if (UNLIKELY(method == nullptr)) {
       CHECK(self->IsExceptionPending());
-      return GetFailureValue();  // Failure.
+      return GetTwoWordFailureValue();  // Failure.
     }
   }
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
-      << MethodHelper(method).GetDexFile().GetLocation();
+      << method->GetDexFile()->GetLocation();
 
-  return GetSuccessValue(code, method);
+  return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
+                                reinterpret_cast<uintptr_t>(method));
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
new file mode 100644
index 0000000..66ee218
--- /dev/null
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include "callee_save_frame.h"
+#include "common_runtime_test.h"
+#include "mirror/art_method-inl.h"
+#include "quick/quick_method_frame_info.h"
+
+namespace art {
+
+class QuickTrampolineEntrypointsTest : public CommonRuntimeTest {
+ protected:
+  static mirror::ArtMethod* CreateCalleeSaveMethod(InstructionSet isa,
+                                                   Runtime::CalleeSaveType type)
+      NO_THREAD_SAFETY_ANALYSIS {
+    Runtime* r = Runtime::Current();
+
+    Thread* t = Thread::Current();
+    t->TransitionFromSuspendedToRunnable();  // So we can create callee-save methods.
+
+    r->SetInstructionSet(isa);
+    mirror::ArtMethod* save_method = r->CreateCalleeSaveMethod(type);
+    r->SetCalleeSaveMethod(save_method, type);
+
+    t->TransitionFromRunnableToSuspended(ThreadState::kNative);  // So we can shut down.
+
+    return save_method;
+  }
+
+  static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
+      NO_THREAD_SAFETY_ANALYSIS {
+    mirror::ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
+    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for "
+        << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills="
+        << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
+  }
+
+  static void CheckPCOffset(InstructionSet isa, Runtime::CalleeSaveType type, size_t pc_offset)
+      NO_THREAD_SAFETY_ANALYSIS {
+    mirror::ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
+    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    EXPECT_EQ(save_method->GetReturnPcOffsetInBytes(), pc_offset) << "Expected and real pc offset"
+        " differs for " << type << " core spills=" << std::hex << frame_info.CoreSpillMask() <<
+        " fp spills=" << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
+  }
+};
+
+// Note: these tests are all runtime tests. They let the Runtime create the corresponding ArtMethod
+// and check against it. Technically we know and expect certain values, but the Runtime code is
+// not constexpr, so we cannot make this compile-time checks (and I want the Runtime code tested).
+
+// This test ensures that kQuickCalleeSaveFrame_RefAndArgs_FrameSize is correct.
+TEST_F(QuickTrampolineEntrypointsTest, FrameSize) {
+  // We have to use a define here as the callee_save_frame.h functions are constexpr.
+#define CHECK_FRAME_SIZE(isa)                                                                     \
+  CheckFrameSize(isa, Runtime::kRefsAndArgs, GetCalleeSaveFrameSize(isa, Runtime::kRefsAndArgs)); \
+  CheckFrameSize(isa, Runtime::kRefsOnly, GetCalleeSaveFrameSize(isa, Runtime::kRefsOnly));       \
+  CheckFrameSize(isa, Runtime::kSaveAll, GetCalleeSaveFrameSize(isa, Runtime::kSaveAll))
+
+  CHECK_FRAME_SIZE(kArm);
+  CHECK_FRAME_SIZE(kArm64);
+  CHECK_FRAME_SIZE(kMips);
+  CHECK_FRAME_SIZE(kX86);
+  CHECK_FRAME_SIZE(kX86_64);
+}
+
+// This test ensures that GetConstExprPointerSize is correct with respect to
+// GetInstructionSetPointerSize.
+TEST_F(QuickTrampolineEntrypointsTest, PointerSize) {
+  EXPECT_EQ(GetInstructionSetPointerSize(kArm), GetConstExprPointerSize(kArm));
+  EXPECT_EQ(GetInstructionSetPointerSize(kArm64), GetConstExprPointerSize(kArm64));
+  EXPECT_EQ(GetInstructionSetPointerSize(kMips), GetConstExprPointerSize(kMips));
+  EXPECT_EQ(GetInstructionSetPointerSize(kX86), GetConstExprPointerSize(kX86));
+  EXPECT_EQ(GetInstructionSetPointerSize(kX86_64), GetConstExprPointerSize(kX86_64));
+}
+
+// This test ensures that the constexpr specialization of the return PC offset computation in
+// GetCalleeSavePCOffset is correct.
+TEST_F(QuickTrampolineEntrypointsTest, ReturnPC) {
+  // Ensure that the computation in callee_save_frame.h correct.
+  // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses
+  // kPointerSize, which is wrong when the target bitwidth is not the same as the host's.
+  CheckPCOffset(kRuntimeISA, Runtime::kRefsAndArgs,
+                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs));
+  CheckPCOffset(kRuntimeISA, Runtime::kRefsOnly,
+                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveAll,
+                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kSaveAll));
+}
+
+}  // namespace art
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f33befb..0dd33cf 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -233,8 +233,7 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pShlLong, pShrLong, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pShrLong, pUshrLong, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUshrLong, pIndexOf, kPointerSize);
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pIndexOf, pMemcmp16, kPointerSize);
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMemcmp16, pStringCompareTo, kPointerSize);
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pIndexOf, pStringCompareTo, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pStringCompareTo, pMemcpy, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMemcpy, pQuickImtConflictTrampoline, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickImtConflictTrampoline, pQuickResolutionTrampoline,
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 6b216c7..3112bc0 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -67,7 +67,7 @@
   action.sa_sigaction = art_fault_handler;
   sigemptyset(&action.sa_mask);
   action.sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__mips__)
+#if !defined(__APPLE__) && !defined(__mips__)
   action.sa_restorer = nullptr;
 #endif
 
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 97d3c2f..026f5b9 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -29,7 +29,7 @@
 
 namespace mirror {
 class ArtMethod;
-}       // namespace mirror
+}  // namespace mirror
 
 class FaultHandler;
 
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index bd04473..2c72ba1 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -49,10 +49,7 @@
     front_index_.StoreRelaxed(0);
     back_index_.StoreRelaxed(0);
     debug_is_sorted_ = true;
-    int result = madvise(begin_, sizeof(T) * capacity_, MADV_DONTNEED);
-    if (result == -1) {
-      PLOG(WARNING) << "madvise failed";
-    }
+    mem_map_->MadviseDontNeedAndZero();
   }
 
   // Beware: Mixing atomic pushes and atomic pops will cause ABA problem.
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 43a173e..a95c003 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -96,7 +96,7 @@
 
 void CardTable::ClearCardTable() {
   COMPILE_ASSERT(kCardClean == 0, clean_card_must_be_0);
-  madvise(mem_map_->Begin(), mem_map_->Size(), MADV_DONTNEED);
+  mem_map_->MadviseDontNeedAndZero();
 }
 
 bool CardTable::AddrIsInCardTable(const void* addr) const {
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 3cb8d94..224b33e 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -72,13 +72,15 @@
 }
 
 template<size_t kAlignment>
+std::string SpaceBitmap<kAlignment>::Dump() const {
+  return StringPrintf("%s: %p-%p", name_.c_str(), reinterpret_cast<void*>(HeapBegin()),
+                      reinterpret_cast<void*>(HeapLimit()));
+}
+
+template<size_t kAlignment>
 void SpaceBitmap<kAlignment>::Clear() {
-  if (bitmap_begin_ != NULL) {
-    // This returns the memory to the system.  Successive page faults will return zeroed memory.
-    int result = madvise(bitmap_begin_, bitmap_size_, MADV_DONTNEED);
-    if (result == -1) {
-      PLOG(FATAL) << "madvise failed";
-    }
+  if (bitmap_begin_ != nullptr) {
+    mem_map_->MadviseDontNeedAndZero();
   }
 }
 
@@ -180,11 +182,10 @@
   if (fields != NULL) {
     for (int32_t i = 0; i < fields->GetLength(); i++) {
       mirror::ArtField* field = fields->Get(i);
-      FieldHelper fh(field);
-      if (!fh.IsPrimitiveType()) {
+      if (!field->IsPrimitiveType()) {
         mirror::Object* value = field->GetObj(obj);
         if (value != NULL) {
-          WalkFieldsInOrder(visited, callback, value,  arg);
+          WalkFieldsInOrder(visited, callback, value, arg);
         }
       }
     }
@@ -210,8 +211,7 @@
     if (fields != NULL) {
       for (int32_t i = 0; i < fields->GetLength(); i++) {
         mirror::ArtField* field = fields->Get(i);
-        FieldHelper fh(field);
-        if (!fh.IsPrimitiveType()) {
+        if (!field->IsPrimitiveType()) {
           mirror::Object* value = field->GetObj(NULL);
           if (value != NULL) {
             WalkFieldsInOrder(visited, callback, value, arg);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 50d15c6..0849171 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -26,7 +26,6 @@
 #include "base/mutex.h"
 #include "gc_allocator.h"
 #include "globals.h"
-#include "mem_map.h"
 #include "object_callbacks.h"
 
 namespace art {
@@ -34,6 +33,7 @@
 namespace mirror {
   class Object;
 }  // namespace mirror
+class MemMap;
 
 namespace gc {
 namespace accounting {
@@ -183,10 +183,7 @@
     name_ = name;
   }
 
-  std::string Dump() const {
-    return StringPrintf("%s: %p-%p", name_.c_str(), reinterpret_cast<void*>(HeapBegin()),
-                        reinterpret_cast<void*>(HeapLimit()));
-  }
+  std::string Dump() const;
 
   const void* GetObjectWordAddress(const mirror::Object* obj) const {
     uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 10b88b3..55262f2 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1507,6 +1507,9 @@
     if (madvise_size > 0) {
       DCHECK_ALIGNED(madvise_begin, kPageSize);
       DCHECK_EQ(RoundUp(madvise_size, kPageSize), madvise_size);
+      if (!kMadviseZeroes) {
+        memset(madvise_begin, 0, madvise_size);
+      }
       CHECK_EQ(madvise(madvise_begin, madvise_size, MADV_DONTNEED), 0);
     }
     if (madvise_begin - zero_begin) {
@@ -2117,6 +2120,9 @@
           start = reinterpret_cast<byte*>(fpr) + kPageSize;
         }
         byte* end = reinterpret_cast<byte*>(fpr) + fpr_size;
+        if (!kMadviseZeroes) {
+          memset(start, 0, end - start);
+        }
         CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
         reclaimed_bytes += fpr_size;
         size_t num_pages = fpr_size / kPageSize;
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 9464331..a439188 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -110,11 +110,17 @@
         byte_size -= kPageSize;
         if (byte_size > 0) {
           if (release_pages) {
+            if (!kMadviseZeroes) {
+              memset(start, 0, byte_size);
+            }
             madvise(start, byte_size, MADV_DONTNEED);
           }
         }
       } else {
         if (release_pages) {
+          if (!kMadviseZeroes) {
+            memset(start, 0, byte_size);
+          }
           madvise(start, byte_size, MADV_DONTNEED);
         }
       }
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index c062706..890036b 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -1130,9 +1130,7 @@
   allocations->Reset();
   timings_.EndSplit();
 
-  int success = madvise(sweep_array_free_buffer_mem_map_->BaseBegin(),
-                        sweep_array_free_buffer_mem_map_->BaseSize(), MADV_DONTNEED);
-  DCHECK_EQ(success, 0) << "Failed to madvise the sweep array free buffer pages.";
+  sweep_array_free_buffer_mem_map_->MadviseDontNeedAndZero();
 }
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 26f87ca..f5f7a86 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -150,6 +150,7 @@
       verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
       verify_pre_sweeping_rosalloc_(verify_pre_sweeping_rosalloc),
       verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
+      last_gc_time_ns_(NanoTime()),
       allocation_rate_(0),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
        * causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap
@@ -2463,7 +2464,7 @@
 }
 
 size_t Heap::GetPercentFree() {
-  return static_cast<size_t>(100.0f * static_cast<float>(GetFreeMemory()) / GetMaxMemory());
+  return static_cast<size_t>(100.0f * static_cast<float>(GetFreeMemory()) / max_allowed_footprint_);
 }
 
 void Heap::SetIdealFootprint(size_t max_allowed_footprint) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e568b36..9b49373 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -408,7 +408,7 @@
 
   // Implements java.lang.Runtime.freeMemory.
   size_t GetFreeMemory() const {
-    return GetMaxMemory() - num_bytes_allocated_.LoadSequentiallyConsistent();
+    return max_allowed_footprint_ - num_bytes_allocated_.LoadSequentiallyConsistent();
   }
 
   // get the space that corresponds to an object's address. Current implementation searches all
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index a58df8e..7988af7 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -61,15 +61,20 @@
     }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
     // it to the mutator as long as the GC is not preserving references. If the GC is
-    // preserving references, the mutator could take a white field and move it somewhere else
-    // in the heap causing corruption since this field would get swept.
     IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_;
-    if (!preserving_references_ && is_marked_callback != nullptr) {
+    if (LIKELY(is_marked_callback != nullptr)) {
       mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_);
       // If it's null it means not marked, but it could become marked if the referent is reachable
-      // by finalizer referents. So we can not return in this case and must block.
+      // by finalizer referents. So we can not return in this case and must block. Otherwise, we
+      // can return it to the mutator as long as the GC is not preserving references, in which
+      // case only black nodes can be safely returned. If the GC is preserving references, the
+      // mutator could take a white field from a grey or white node and move it somewhere else
+      // in the heap causing corruption since this field would get swept.
       if (obj != nullptr) {
-        return obj;
+        if (!preserving_references_ ||
+           (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
+          return obj;
+        }
       }
     }
     condition_.WaitHoldingLocks(self);
@@ -113,14 +118,14 @@
   timings->StartSplit(concurrent ? "ProcessReferences" : "(Paused)ProcessReferences");
   // Unless required to clear soft references with white references, preserve some white referents.
   if (!clear_soft_references) {
-    TimingLogger::ScopedSplit split(concurrent ? "PreserveSomeSoftReferences" :
-        "(Paused)PreserveSomeSoftReferences", timings);
+    TimingLogger::ScopedSplit split(concurrent ? "ForwardSoftReferences" :
+        "(Paused)ForwardSoftReferences", timings);
     if (concurrent) {
       StartPreservingReferences(self);
     }
-    // References with a marked referent are removed from the list.
-    soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
-                                                     &process_references_args_);
+
+    soft_reference_queue_.ForwardSoftReferences(&PreserveSoftReferenceCallback,
+                                                &process_references_args_);
     process_mark_stack_callback(arg);
     if (concurrent) {
       StopPreservingReferences(self);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index caacef5..3910c29 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -160,22 +160,23 @@
   }
 }
 
-void ReferenceQueue::PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg) {
-  ReferenceQueue cleared;
-  while (!IsEmpty()) {
-    mirror::Reference* ref = DequeuePendingReference();
+void ReferenceQueue::ForwardSoftReferences(IsMarkedCallback* preserve_callback,
+                                                void* arg) {
+  if (UNLIKELY(IsEmpty())) {
+    return;
+  }
+  mirror::Reference* const head = list_;
+  mirror::Reference* ref = head;
+  do {
     mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address == nullptr) {
-        // Either the reference isn't marked or we don't wish to preserve it.
-        cleared.EnqueuePendingReference(ref);
-      } else if (forward_address != referent) {
+      if (forward_address != nullptr && forward_address != referent) {
         ref->SetReferent<false>(forward_address);
       }
     }
-  }
-  list_ = cleared.GetList();
+    ref = ref->GetPendingNext();
+  } while (LIKELY(ref != head));
 }
 
 }  // namespace gc
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 4f223e2..1d8cc1a 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -65,7 +65,7 @@
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
+  void ForwardSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index fd0a92d..8b35692 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -64,6 +64,9 @@
 
 void BumpPointerSpace::Clear() {
   // Release the pages back to the operating system.
+  if (!kMadviseZeroes) {
+    memset(Begin(), 0, Limit() - Begin());
+  }
   CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 3d35c00..61633cd 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -18,6 +18,7 @@
 
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
+#include "base/scoped_flock.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "mirror/art_method.h"
 #include "mirror/class-inl.h"
@@ -148,7 +149,17 @@
   std::string image_filename;
   std::string error_msg;
   bool is_system = false;
-  if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) {
+  const bool found_image = FindImageFilename(image_location, image_isa, &image_filename,
+                                             &is_system);
+
+  // Note that we must not use the file descriptor associated with
+  // ScopedFlock::GetFile to Init the image file. We want the file
+  // descriptor (and the associated exclusive lock) to be released when
+  // we leave Create.
+  ScopedFlock image_lock;
+  image_lock.Init(image_filename.c_str(), &error_msg);
+
+  if (found_image) {
     ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, !is_system,
                                          &error_msg);
     if (space != nullptr) {
diff --git a/runtime/globals.h b/runtime/globals.h
index 07fadb9..58c2118 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -36,13 +36,6 @@
 static constexpr size_t kWordSize = sizeof(word);
 static constexpr size_t kPointerSize = sizeof(void*);
 
-// Architecture-specific pointer sizes
-static constexpr size_t kArmPointerSize = 4;
-static constexpr size_t kArm64PointerSize = 8;
-static constexpr size_t kMipsPointerSize = 4;
-static constexpr size_t kX86PointerSize = 4;
-static constexpr size_t kX86_64PointerSize = 8;
-
 static constexpr size_t kBitsPerByte = 8;
 static constexpr size_t kBitsPerByteLog2 = 3;
 static constexpr int kBitsPerWord = kWordSize * kBitsPerByte;
@@ -51,20 +44,6 @@
 // Required stack alignment
 static constexpr size_t kStackAlignment = 16;
 
-// ARM instruction alignment. ARM processors require code to be 4-byte aligned,
-// but ARM ELF requires 8..
-static constexpr size_t kArmAlignment = 8;
-
-// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
-static constexpr size_t kArm64Alignment = 16;
-
-// MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
-// TODO: Can this be 4?
-static constexpr size_t kMipsAlignment = 8;
-
-// X86 instruction alignment. This is the recommended alignment for maximum performance.
-static constexpr size_t kX86Alignment = 16;
-
 // System page size. We check this against sysconf(_SC_PAGE_SIZE) at runtime, but use a simple
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
diff --git a/runtime/handle.h b/runtime/handle.h
index b70f651..7e13601 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -26,18 +26,20 @@
 
 class Thread;
 
+template<class T> class Handle;
+
 template<class T>
-class Handle {
+class ConstHandle {
  public:
-  Handle() : reference_(nullptr) {
+  ConstHandle() : reference_(nullptr) {
   }
-  Handle(const Handle<T>& handle) ALWAYS_INLINE : reference_(handle.reference_) {
+  ConstHandle(const ConstHandle<T>& handle) ALWAYS_INLINE : reference_(handle.reference_) {
   }
-  Handle<T>& operator=(const Handle<T>& handle) ALWAYS_INLINE {
+  ConstHandle<T>& operator=(const ConstHandle<T>& handle) ALWAYS_INLINE {
     reference_ = handle.reference_;
     return *this;
   }
-  explicit Handle(StackReference<T>* reference) ALWAYS_INLINE : reference_(reference) {
+  explicit ConstHandle(StackReference<T>* reference) ALWAYS_INLINE : reference_(reference) {
   }
   T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
     return *Get();
@@ -48,11 +50,6 @@
   T* Get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
     return reference_->AsMirrorPtr();
   }
-  T* Assign(T* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
-    T* old = reference_->AsMirrorPtr();
-    reference_->Assign(reference);
-    return old;
-  }
   jobject ToJObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
       // Special case so that we work with NullHandles.
@@ -65,17 +62,62 @@
   StackReference<T>* reference_;
 
   template<typename S>
-  explicit Handle(StackReference<S>* reference)
+  explicit ConstHandle(StackReference<S>* reference)
       : reference_(reinterpret_cast<StackReference<T>*>(reference)) {
   }
   template<typename S>
-  explicit Handle(const Handle<S>& handle)
+  explicit ConstHandle(const ConstHandle<S>& handle)
       : reference_(reinterpret_cast<StackReference<T>*>(handle.reference_)) {
   }
 
   StackReference<T>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
     return reference_;
   }
+  const StackReference<T>* GetReference() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE {
+    return reference_;
+  }
+
+ private:
+  friend class BuildGenericJniFrameVisitor;
+  template<class S> friend class ConstHandle;
+  friend class HandleScope;
+  template<class S> friend class HandleWrapper;
+  template<size_t kNumReferences> friend class StackHandleScope;
+};
+
+template<class T>
+class Handle : public ConstHandle<T> {
+ public:
+  Handle() {
+  }
+  Handle(const Handle<T>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE
+      : ConstHandle<T>(handle.reference_) {
+  }
+  Handle<T>& operator=(const Handle<T>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE {
+    ConstHandle<T>::operator=(handle);
+    return *this;
+  }
+  explicit Handle(StackReference<T>* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE : ConstHandle<T>(reference) {
+  }
+  T* Assign(T* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    StackReference<T>* ref = ConstHandle<T>::GetReference();
+    T* const old = ref->AsMirrorPtr();
+    ref->Assign(reference);
+    return old;
+  }
+
+ protected:
+  template<typename S>
+  explicit Handle(StackReference<S>* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : ConstHandle<T>(reference) {
+  }
+  template<typename S>
+  explicit Handle(const Handle<S>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : ConstHandle<T>(handle) {
+  }
 
  private:
   friend class BuildGenericJniFrameVisitor;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 91f1718..33339f8 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -919,8 +919,6 @@
 
       rec->AddU2(0);  // empty const pool
 
-      FieldHelper fh;
-
       // Static fields
       if (sFieldCount == 0) {
         rec->AddU2((uint16_t)0);
@@ -932,11 +930,10 @@
 
         for (size_t i = 0; i < sFieldCount; ++i) {
           mirror::ArtField* f = thisClass->GetStaticField(i);
-          fh.ChangeField(f);
 
           size_t size;
-          HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), &size);
-          rec->AddStringId(LookupStringId(fh.GetName()));
+          HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
+          rec->AddStringId(LookupStringId(f->GetName()));
           rec->AddU1(t);
           if (size == 1) {
             rec->AddU1(static_cast<uint8_t>(f->Get32(thisClass)));
@@ -957,9 +954,8 @@
       rec->AddU2((uint16_t)iFieldCount);
       for (int i = 0; i < iFieldCount; ++i) {
         mirror::ArtField* f = thisClass->GetInstanceField(i);
-        fh.ChangeField(f);
-        HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), NULL);
-        rec->AddStringId(LookupStringId(fh.GetName()));
+        HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), NULL);
+        rec->AddStringId(LookupStringId(f->GetName()));
         rec->AddU1(t);
       }
     } else if (c->IsArrayClass()) {
@@ -1015,14 +1011,12 @@
       // Write the instance data;  fields for this class, followed by super class fields,
       // and so on. Don't write the klass or monitor fields of Object.class.
       mirror::Class* sclass = c;
-      FieldHelper fh;
       while (!sclass->IsObjectClass()) {
         int ifieldCount = sclass->NumInstanceFields();
         for (int i = 0; i < ifieldCount; ++i) {
           mirror::ArtField* f = sclass->GetInstanceField(i);
-          fh.ChangeField(f);
           size_t size;
-          SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), &size);
+          SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
           if (size == 1) {
             rec->AddU1(f->Get32(obj));
           } else if (size == 2) {
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index c1931a9..5b60396 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -16,9 +16,6 @@
 
 #include "instruction_set.h"
 
-#include "globals.h"
-#include "base/logging.h"  // Logging is required for FATAL in the helper functions.
-
 namespace art {
 
 const char* GetInstructionSetString(const InstructionSet isa) {
@@ -63,75 +60,6 @@
   return kNone;
 }
 
-size_t GetInstructionSetPointerSize(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return kArmPointerSize;
-    case kArm64:
-      return kArm64PointerSize;
-    case kX86:
-      return kX86PointerSize;
-    case kX86_64:
-      return kX86_64PointerSize;
-    case kMips:
-      return kMipsPointerSize;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have pointer size.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return 4;
-    case kArm64:
-      return 8;
-    case kX86:
-      return 4;
-    case kX86_64:
-      return 8;
-    case kMips:
-      return 4;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have spills.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return 4;
-    case kArm64:
-      return 8;
-    case kX86:
-      return 8;
-    case kX86_64:
-      return 8;
-    case kMips:
-      return 4;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have spills.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
 size_t GetInstructionSetAlignment(InstructionSet isa) {
   switch (isa) {
     case kArm:
@@ -155,27 +83,6 @@
   }
 }
 
-bool Is64BitInstructionSet(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-    case kX86:
-    case kMips:
-      return false;
-
-    case kArm64:
-    case kX86_64:
-      return true;
-
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have bit width.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
 std::string InstructionSetFeatures::GetFeatureString() const {
   std::string result;
   if ((mask_ & kHwDiv) != 0) {
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 679c575..67e7100 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -20,6 +20,7 @@
 #include <iosfwd>
 #include <string>
 
+#include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 #include "base/macros.h"
 
 namespace art {
@@ -35,14 +36,122 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+// Architecture-specific pointer sizes
+static constexpr size_t kArmPointerSize = 4;
+static constexpr size_t kArm64PointerSize = 8;
+static constexpr size_t kMipsPointerSize = 4;
+static constexpr size_t kX86PointerSize = 4;
+static constexpr size_t kX86_64PointerSize = 8;
+
+// ARM instruction alignment. ARM processors require code to be 4-byte aligned,
+// but ARM ELF requires 8..
+static constexpr size_t kArmAlignment = 8;
+
+// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kArm64Alignment = 16;
+
+// MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
+// TODO: Can this be 4?
+static constexpr size_t kMipsAlignment = 8;
+
+// X86 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kX86Alignment = 16;
+
+
 const char* GetInstructionSetString(InstructionSet isa);
 InstructionSet GetInstructionSetFromString(const char* instruction_set);
 
-size_t GetInstructionSetPointerSize(InstructionSet isa);
+static inline size_t GetInstructionSetPointerSize(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kArmPointerSize;
+    case kArm64:
+      return kArm64PointerSize;
+    case kX86:
+      return kX86PointerSize;
+    case kX86_64:
+      return kX86_64PointerSize;
+    case kMips:
+      return kMipsPointerSize;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have pointer size.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
 size_t GetInstructionSetAlignment(InstructionSet isa);
-bool Is64BitInstructionSet(InstructionSet isa);
-size_t GetBytesPerGprSpillLocation(InstructionSet isa);
-size_t GetBytesPerFprSpillLocation(InstructionSet isa);
+
+static inline bool Is64BitInstructionSet(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+    case kX86:
+    case kMips:
+      return false;
+
+    case kArm64:
+    case kX86_64:
+      return true;
+
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have bit width.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
+static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return 4;
+    case kArm64:
+      return 8;
+    case kX86:
+      return 4;
+    case kX86_64:
+      return 8;
+    case kMips:
+      return 4;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have spills.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
+static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return 4;
+    case kArm64:
+      return 8;
+    case kX86:
+      return 8;
+    case kX86_64:
+      return 8;
+    case kMips:
+      return 4;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have spills.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
 
 #if defined(__arm__)
 static constexpr InstructionSet kRuntimeISA = kArm;
@@ -107,6 +216,68 @@
   uint32_t mask_;
 };
 
+// The following definitions create return types for two word-sized entities that will be passed
+// in registers so that memory operations for the interface trampolines can be avoided. The entities
+// are the resolved method and the pointer to the code to be invoked.
+//
+// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
+// uint64_t or long long int.
+//
+// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
+// size_t-sized values.
+//
+// We need two operations:
+//
+// 1) A flag value that signals failure. The assembly stubs expect the lower part to be "0".
+//    GetTwoWordFailureValue() will return a value that has lower part == 0.
+//
+// 2) A value that combines two word-sized values.
+//    GetTwoWordSuccessValue() constructs this.
+//
+// IMPORTANT: If you use this to transfer object pointers, it is your responsibility to ensure
+//            that the object does not move or the value is updated. Simple use of this is NOT SAFE
+//            when the garbage collector can move objects concurrently. Ensure that required locks
+//            are held when using!
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+typedef uint64_t TwoWordReturn;
+
+// Encodes method_ptr==nullptr and code_ptr==nullptr
+static inline constexpr TwoWordReturn GetTwoWordFailureValue() {
+  return 0;
+}
+
+// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
+static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+  uint32_t lo32 = static_cast<uint32_t>(lo);
+  uint64_t hi64 = static_cast<uint64_t>(hi);
+  return ((hi64 << 32) | lo32);
+}
+
+#elif defined(__x86_64__) || defined(__aarch64__)
+struct TwoWordReturn {
+  uintptr_t lo;
+  uintptr_t hi;
+};
+
+// Encodes method_ptr==nullptr. Leaves random value in code pointer.
+static inline TwoWordReturn GetTwoWordFailureValue() {
+  TwoWordReturn ret;
+  ret.lo = 0;
+  return ret;
+}
+
+// Write values into their respective members.
+static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+  TwoWordReturn ret;
+  ret.lo = lo;
+  ret.hi = hi;
+  return ret;
+}
+#else
+#error "Unsupported architecture"
+#endif
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_INSTRUCTION_SET_H_
diff --git a/runtime/instruction_set_test.cc b/runtime/instruction_set_test.cc
index cd6337c..ece3238 100644
--- a/runtime/instruction_set_test.cc
+++ b/runtime/instruction_set_test.cc
@@ -45,4 +45,8 @@
   EXPECT_EQ(kRuntimeISA, GetInstructionSetFromString(GetInstructionSetString(kRuntimeISA)));
 }
 
+TEST_F(InstructionSetTest, PointerSize) {
+  EXPECT_EQ(kPointerSize, GetInstructionSetPointerSize(kRuntimeISA));
+}
+
 }  // namespace art
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 194cb18..8f5da83 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -780,24 +780,20 @@
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          mirror::ArtField* field) const {
-  if (have_field_read_listeners_) {
-    // TODO: same comment than DexPcMovedEventImpl.
-    std::list<InstrumentationListener*> copy(field_read_listeners_);
-    for (InstrumentationListener* listener : copy) {
-      listener->FieldRead(thread, this_object, method, dex_pc, field);
-    }
+  // TODO: same comment than DexPcMovedEventImpl.
+  std::list<InstrumentationListener*> copy(field_read_listeners_);
+  for (InstrumentationListener* listener : copy) {
+    listener->FieldRead(thread, this_object, method, dex_pc, field);
   }
 }
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          mirror::ArtField* field, const JValue& field_value) const {
-  if (have_field_write_listeners_) {
-    // TODO: same comment than DexPcMovedEventImpl.
-    std::list<InstrumentationListener*> copy(field_write_listeners_);
-    for (InstrumentationListener* listener : copy) {
-      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
-    }
+  // TODO: same comment than DexPcMovedEventImpl.
+  std::list<InstrumentationListener*> copy(field_write_listeners_);
+  for (InstrumentationListener* listener : copy) {
+    listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
   }
 }
 
@@ -805,8 +801,9 @@
                                            mirror::ArtMethod* catch_method,
                                            uint32_t catch_dex_pc,
                                            mirror::Throwable* exception_object) const {
-  if (have_exception_caught_listeners_) {
-    DCHECK_EQ(thread->GetException(NULL), exception_object);
+  if (HasExceptionCaughtListeners()) {
+    DCHECK_EQ(thread->GetException(nullptr), exception_object);
+    bool is_exception_reported = thread->IsExceptionReportedToInstrumentation();
     thread->ClearException();
     // TODO: The copy below is due to the debug listener having an action where it can remove
     // itself as a listener and break the iterator. The copy only works around the problem.
@@ -815,6 +812,7 @@
       listener->ExceptionCaught(thread, throw_location, catch_method, catch_dex_pc, exception_object);
     }
     thread->SetException(throw_location, exception_object);
+    thread->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
 }
 
@@ -846,8 +844,9 @@
   MethodEnterEvent(self, this_object, method, 0);
 }
 
-uint64_t Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                                       uint64_t gpr_result, uint64_t fpr_result) {
+TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
+                                                            uint64_t gpr_result,
+                                                            uint64_t fpr_result) {
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -859,7 +858,8 @@
   CheckStackDepth(self, instrumentation_frame, 0);
 
   mirror::ArtMethod* method = instrumentation_frame.method_;
-  char return_shorty = MethodHelper(method).GetShorty()[0];
+  uint32_t length;
+  char return_shorty = method->GetShorty(&length)[0];
   JValue return_value;
   if (return_shorty == 'V') {
     return_value.SetJ(0);
@@ -889,14 +889,14 @@
                 << " result is " << std::hex << return_value.GetJ();
     }
     self->SetDeoptimizationReturnValue(return_value);
-    return static_cast<uint64_t>(GetQuickDeoptimizationEntryPoint()) |
-        (static_cast<uint64_t>(*return_pc) << 32);
+    return GetTwoWordSuccessValue(*return_pc,
+                                  reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
   } else {
     if (kVerboseInstrumentation) {
       LOG(INFO) << "Returning from " << PrettyMethod(method)
                 << " to PC " << reinterpret_cast<void*>(*return_pc);
     }
-    return *return_pc;
+    return GetTwoWordSuccessValue(0, *return_pc);
   }
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 5630862..d0cb4de 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -17,15 +17,16 @@
 #ifndef ART_RUNTIME_INSTRUMENTATION_H_
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
-#include "atomic.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "object_callbacks.h"
-
 #include <stdint.h>
 #include <set>
 #include <list>
 
+#include "atomic.h"
+#include "instruction_set.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "object_callbacks.h"
+
 namespace art {
 namespace mirror {
   class ArtField;
@@ -236,6 +237,10 @@
     return have_field_write_listeners_;
   }
 
+  bool HasExceptionCaughtListeners() const {
+    return have_exception_caught_listeners_;
+  }
+
   bool IsActive() const {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
@@ -311,8 +316,8 @@
 
   // Called when an instrumented method is exited. Removes the pushed instrumentation frame
   // returning the intended link register. Generates method exit events.
-  uint64_t PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc, uint64_t gpr_result,
-                                        uint64_t fpr_result)
+  TwoWordReturn PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
+                                             uint64_t gpr_result, uint64_t fpr_result)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 9cfba8d..6dbc6a0 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -400,8 +400,7 @@
   }
 
   const char* old_cause = self->StartAssertNoThreadSuspension("EnterInterpreterFromInvoke");
-  MethodHelper mh(method);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
   uint16_t num_regs;
   uint16_t num_ins;
   if (code_item != NULL) {
@@ -413,7 +412,7 @@
     return;
   } else {
     DCHECK(method->IsNative());
-    num_regs = num_ins = ArtMethod::NumArgRegisters(mh.GetShorty());
+    num_regs = num_ins = ArtMethod::NumArgRegisters(method->GetShorty());
     if (!method->IsStatic()) {
       num_regs++;
       num_ins++;
@@ -431,9 +430,10 @@
     shadow_frame->SetVRegReference(cur_reg, receiver);
     ++cur_reg;
   }
-  const char* shorty = mh.GetShorty();
+  uint32_t shorty_len = 0;
+  const char* shorty = method->GetShorty(&shorty_len);
   for (size_t shorty_pos = 0, arg_pos = 0; cur_reg < num_regs; ++shorty_pos, ++arg_pos, cur_reg++) {
-    DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
+    DCHECK_LT(shorty_pos + 1, shorty_len);
     switch (shorty[shorty_pos + 1]) {
       case 'L': {
         Object* o = reinterpret_cast<StackReference<Object>*>(&args[arg_pos])->AsMirrorPtr();
@@ -465,6 +465,8 @@
     }
   }
   if (LIKELY(!method->IsNative())) {
+    StackHandleScope<1> hs(self);
+    MethodHelper mh(hs.NewHandle(method));
     JValue r = Execute(self, mh, code_item, *shadow_frame, JValue());
     if (result != NULL) {
       *result = r;
@@ -488,11 +490,11 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JValue value;
   value.SetJ(ret_val->GetJ());  // Set value to last known result in case the shadow frame chain is empty.
-  MethodHelper mh;
   while (shadow_frame != NULL) {
     self->SetTopOfShadowStack(shadow_frame);
-    mh.ChangeMethod(shadow_frame->GetMethod());
-    const DexFile::CodeItem* code_item = mh.GetCodeItem();
+    StackHandleScope<1> hs(self);
+    MethodHelper mh(hs.NewHandle(shadow_frame->GetMethod()));
+    const DexFile::CodeItem* code_item = mh.GetMethod()->GetCodeItem();
     value = Execute(self, mh, code_item, *shadow_frame, value);
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 19b85e4..c7fb884 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -20,6 +20,471 @@
 namespace art {
 namespace interpreter {
 
+void ThrowNullPointerExceptionFromInterpreter(const ShadowFrame& shadow_frame) {
+  ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+}
+
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
+bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) {
+  const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
+  const uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
+  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
+                                                              Primitive::FieldSize(field_type));
+  if (UNLIKELY(f == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+  Object* obj;
+  if (is_static) {
+    obj = f->GetDeclaringClass();
+  } else {
+    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+    if (UNLIKELY(obj == nullptr)) {
+      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(), f, true);
+      return false;
+    }
+  }
+  // Report this field access to instrumentation if needed.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
+    Object* this_object = f->IsStatic() ? nullptr : obj;
+    instrumentation->FieldReadEvent(self, this_object, shadow_frame.GetMethod(),
+                                    shadow_frame.GetDexPC(), f);
+  }
+  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      shadow_frame.SetVReg(vregA, f->GetBoolean(obj));
+      break;
+    case Primitive::kPrimByte:
+      shadow_frame.SetVReg(vregA, f->GetByte(obj));
+      break;
+    case Primitive::kPrimChar:
+      shadow_frame.SetVReg(vregA, f->GetChar(obj));
+      break;
+    case Primitive::kPrimShort:
+      shadow_frame.SetVReg(vregA, f->GetShort(obj));
+      break;
+    case Primitive::kPrimInt:
+      shadow_frame.SetVReg(vregA, f->GetInt(obj));
+      break;
+    case Primitive::kPrimLong:
+      shadow_frame.SetVRegLong(vregA, f->GetLong(obj));
+      break;
+    case Primitive::kPrimNot:
+      shadow_frame.SetVRegReference(vregA, f->GetObject(obj));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoFieldGet functions.
+#define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check) \
+  template bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, \
+                                                               ShadowFrame& shadow_frame, \
+                                                               const Instruction* inst, \
+                                                               uint16_t inst_data)
+
+#define EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false);  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true);
+
+// iget-XXX
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstanceObjectRead, Primitive::kPrimNot);
+
+// sget-XXX
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticObjectRead, Primitive::kPrimNot);
+
+#undef EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL
+
+// Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<Primitive::Type field_type>
+bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  if (UNLIKELY(obj == nullptr)) {
+    // We lost the reference to the field index so we cannot get a more
+    // precised exception message.
+    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+    return false;
+  }
+  MemberOffset field_offset(inst->VRegC_22c());
+  // Report this field access to instrumentation if needed. Since we only have the offset of
+  // the field from the base of the object, we need to look for it first.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
+    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
+                                                        field_offset.Uint32Value());
+    DCHECK(f != nullptr);
+    DCHECK(!f->IsStatic());
+    instrumentation->FieldReadEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
+                                    shadow_frame.GetDexPC(), f);
+  }
+  // Note: iget-x-quick instructions are only for non-volatile fields.
+  const uint32_t vregA = inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimInt:
+      shadow_frame.SetVReg(vregA, static_cast<int32_t>(obj->GetField32(field_offset)));
+      break;
+    case Primitive::kPrimLong:
+      shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset)));
+      break;
+    case Primitive::kPrimNot:
+      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object>(field_offset));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoIGetQuick functions.
+#define EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(_field_type) \
+  template bool DoIGetQuick<_field_type>(ShadowFrame& shadow_frame, const Instruction* inst, \
+                                         uint16_t inst_data)
+
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
+#undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
+
+template<Primitive::Type field_type>
+static JValue GetFieldValue(const ShadowFrame& shadow_frame, uint32_t vreg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  JValue field_value;
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      field_value.SetZ(static_cast<uint8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimByte:
+      field_value.SetB(static_cast<int8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimChar:
+      field_value.SetC(static_cast<uint16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimShort:
+      field_value.SetS(static_cast<int16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimInt:
+      field_value.SetI(shadow_frame.GetVReg(vreg));
+      break;
+    case Primitive::kPrimLong:
+      field_value.SetJ(shadow_frame.GetVRegLong(vreg));
+      break;
+    case Primitive::kPrimNot:
+      field_value.SetL(shadow_frame.GetVRegReference(vreg));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      break;
+  }
+  return field_value;
+}
+
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
+         bool transaction_active>
+bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) {
+  bool do_assignability_check = do_access_check;
+  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
+  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
+  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
+                                                              Primitive::FieldSize(field_type));
+  if (UNLIKELY(f == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+  Object* obj;
+  if (is_static) {
+    obj = f->GetDeclaringClass();
+  } else {
+    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+    if (UNLIKELY(obj == nullptr)) {
+      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(),
+                                              f, false);
+      return false;
+    }
+  }
+  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  // Report this field access to instrumentation if needed. Since we only have the offset of
+  // the field from the base of the object, we need to look for it first.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
+    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
+    Object* this_object = f->IsStatic() ? nullptr : obj;
+    instrumentation->FieldWriteEvent(self, this_object, shadow_frame.GetMethod(),
+                                     shadow_frame.GetDexPC(), f, field_value);
+  }
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      f->SetBoolean<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimByte:
+      f->SetByte<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimChar:
+      f->SetChar<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimShort:
+      f->SetShort<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimInt:
+      f->SetInt<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimLong:
+      f->SetLong<transaction_active>(obj, shadow_frame.GetVRegLong(vregA));
+      break;
+    case Primitive::kPrimNot: {
+      Object* reg = shadow_frame.GetVRegReference(vregA);
+      if (do_assignability_check && reg != nullptr) {
+        // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
+        // object in the destructor.
+        Class* field_class;
+        {
+          StackHandleScope<3> hs(self);
+          HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+          HandleWrapper<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
+          HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+          FieldHelper fh(h_f);
+          field_class = fh.GetType();
+        }
+        if (!reg->VerifierInstanceOf(field_class)) {
+          // This should never happen.
+          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                                   "Ljava/lang/VirtualMachineError;",
+                                   "Put '%s' that is not instance of field '%s' in '%s'",
+                                   reg->GetClass()->GetDescriptor().c_str(),
+                                   field_class->GetDescriptor().c_str(),
+                                   f->GetDeclaringClass()->GetDescriptor().c_str());
+          return false;
+        }
+      }
+      f->SetObj<transaction_active>(obj, reg);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoFieldPut functions.
+#define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active) \
+  template bool DoFieldPut<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, \
+      const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data)
+
+#define EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, false);  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, false);  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, true);  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, true);
+
+// iput-XXX
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstanceObjectWrite, Primitive::kPrimNot);
+
+// sput-XXX
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticObjectWrite, Primitive::kPrimNot);
+
+#undef EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL
+
+template<Primitive::Type field_type, bool transaction_active>
+bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  if (UNLIKELY(obj == nullptr)) {
+    // We lost the reference to the field index so we cannot get a more
+    // precised exception message.
+    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+    return false;
+  }
+  MemberOffset field_offset(inst->VRegC_22c());
+  const uint32_t vregA = inst->VRegA_22c(inst_data);
+  // Report this field modification to instrumentation if needed. Since we only have the offset of
+  // the field from the base of the object, we need to look for it first.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
+    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
+                                                        field_offset.Uint32Value());
+    DCHECK(f != nullptr);
+    DCHECK(!f->IsStatic());
+    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
+    instrumentation->FieldWriteEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
+                                     shadow_frame.GetDexPC(), f, field_value);
+  }
+  // Note: iput-x-quick instructions are only for non-volatile fields.
+  switch (field_type) {
+    case Primitive::kPrimInt:
+      obj->SetField32<transaction_active>(field_offset, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimLong:
+      obj->SetField64<transaction_active>(field_offset, shadow_frame.GetVRegLong(vregA));
+      break;
+    case Primitive::kPrimNot:
+      obj->SetFieldObject<transaction_active>(field_offset, shadow_frame.GetVRegReference(vregA));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoIPutQuick functions.
+#define EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, _transaction_active) \
+  template bool DoIPutQuick<_field_type, _transaction_active>(const ShadowFrame& shadow_frame, \
+                                                              const Instruction* inst, \
+                                                              uint16_t inst_data)
+
+#define EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(_field_type)   \
+  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, false);     \
+  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, true);
+
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
+#undef EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL
+
+/**
+ * Finds the location where this exception will be caught. We search until we reach either the top
+ * frame or a native frame, in which cases this exception is considered uncaught.
+ */
+class CatchLocationFinder : public StackVisitor {
+ public:
+  explicit CatchLocationFinder(Thread* self, Handle<mirror::Throwable>* exception)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    : StackVisitor(self, nullptr), self_(self), handle_scope_(self), exception_(exception),
+      catch_method_(handle_scope_.NewHandle<mirror::ArtMethod>(nullptr)),
+      catch_dex_pc_(DexFile::kDexNoIndex), clear_exception_(false) {
+  }
+
+  bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* method = GetMethod();
+    if (method == nullptr) {
+      return true;
+    }
+    if (method->IsRuntimeMethod()) {
+      // Ignore callee save method.
+      DCHECK(method->IsCalleeSaveMethod());
+      return true;
+    }
+    if (method->IsNative()) {
+      return false;  // End stack walk.
+    }
+    DCHECK(!method->IsNative());
+    uint32_t dex_pc = GetDexPc();
+    if (dex_pc != DexFile::kDexNoIndex) {
+      uint32_t found_dex_pc;
+      {
+        StackHandleScope<3> hs(self_);
+        Handle<mirror::Class> exception_class(hs.NewHandle((*exception_)->GetClass()));
+        Handle<mirror::ArtMethod> h_method(hs.NewHandle(method));
+        found_dex_pc = mirror::ArtMethod::FindCatchBlock(h_method, exception_class, dex_pc,
+                                                         &clear_exception_);
+      }
+      if (found_dex_pc != DexFile::kDexNoIndex) {
+        catch_method_.Assign(method);
+        catch_dex_pc_ = found_dex_pc;
+        return false;  // End stack walk.
+      }
+    }
+    return true;  // Continue stack walk.
+  }
+
+  ArtMethod* GetCatchMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return catch_method_.Get();
+  }
+
+  uint32_t GetCatchDexPc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return catch_dex_pc_;
+  }
+
+  bool NeedClearException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return clear_exception_;
+  }
+
+ private:
+  Thread* const self_;
+  StackHandleScope<1> handle_scope_;
+  Handle<mirror::Throwable>* exception_;
+  Handle<mirror::ArtMethod> catch_method_;
+  uint32_t catch_dex_pc_;
+  bool clear_exception_;
+
+
+  DISALLOW_COPY_AND_ASSIGN(CatchLocationFinder);
+};
+
+uint32_t FindNextInstructionFollowingException(Thread* self,
+                                               ShadowFrame& shadow_frame,
+                                               uint32_t dex_pc,
+                                               const instrumentation::Instrumentation* instrumentation) {
+  self->VerifyStack();
+  ThrowLocation throw_location;
+  StackHandleScope<3> hs(self);
+  Handle<mirror::Throwable> exception(hs.NewHandle(self->GetException(&throw_location)));
+  if (!self->IsExceptionReportedToInstrumentation() && instrumentation->HasExceptionCaughtListeners()) {
+    CatchLocationFinder clf(self, &exception);
+    clf.WalkStack(false);
+    instrumentation->ExceptionCaughtEvent(self, throw_location, clf.GetCatchMethod(),
+                                          clf.GetCatchDexPc(), exception.Get());
+    self->SetExceptionReportedToInstrumentation(true);
+  }
+  bool clear_exception = false;
+  uint32_t found_dex_pc;
+  {
+    Handle<mirror::Class> exception_class(hs.NewHandle(exception->GetClass()));
+    Handle<mirror::ArtMethod> h_method(hs.NewHandle(shadow_frame.GetMethod()));
+    found_dex_pc = mirror::ArtMethod::FindCatchBlock(h_method, exception_class, dex_pc,
+                                                     &clear_exception);
+  }
+  if (found_dex_pc == DexFile::kDexNoIndex) {
+    instrumentation->MethodUnwindEvent(self, shadow_frame.GetThisObject(),
+                                       shadow_frame.GetMethod(), dex_pc);
+  } else {
+    if (self->IsExceptionReportedToInstrumentation()) {
+      instrumentation->MethodUnwindEvent(self, shadow_frame.GetThisObject(),
+                                         shadow_frame.GetMethod(), dex_pc);
+    }
+    if (clear_exception) {
+      self->ClearException();
+    }
+  }
+  return found_dex_pc;
+}
+
+void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh) {
+  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(mh.GetMethod()->GetDexFile());
+  exit(0);  // Unreachable, keep GCC happy.
+}
+
 static void UnstartedRuntimeInvoke(Thread* self, MethodHelper& mh,
                                    const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
                                    JValue* result, size_t arg_offset)
@@ -54,8 +519,7 @@
 bool DoCall(ArtMethod* method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
   // Compute method information.
-  MethodHelper mh(method);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
   const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
   uint16_t num_regs;
   if (LIKELY(code_item != NULL)) {
@@ -73,6 +537,8 @@
 
   // Initialize new shadow frame.
   const size_t first_dest_reg = num_regs - num_ins;
+  StackHandleScope<1> hs(self);
+  MethodHelper mh(hs.NewHandle(method));
   if (do_assignability_check) {
     // Slow path.
     // We might need to do class loading, which incurs a thread state change to kNative. So
@@ -82,8 +548,9 @@
 
     // We need to do runtime check on reference assignment. We need to load the shorty
     // to get the exact type of each reference argument.
-    const DexFile::TypeList* params = mh.GetParameterTypeList();
-    const char* shorty = mh.GetShorty();
+    const DexFile::TypeList* params = method->GetParameterTypeList();
+    uint32_t shorty_len = 0;
+    const char* shorty = method->GetShorty(&shorty_len);
 
     // TODO: find a cleaner way to separate non-range and range information without duplicating code.
     uint32_t arg[5];  // only used in invoke-XXX.
@@ -98,13 +565,13 @@
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
     if (!method->IsStatic()) {
-      size_t receiver_reg = (is_range) ? vregC : arg[0];
+      size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
       ++arg_offset;
     }
     for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
-      DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
+      DCHECK_LT(shorty_pos + 1, shorty_len);
       const size_t src_reg = (is_range) ? vregC + arg_offset : arg[arg_offset];
       switch (shorty[shorty_pos + 1]) {
         case 'L': {
@@ -120,7 +587,7 @@
               self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                        "Ljava/lang/VirtualMachineError;",
                                        "Invoking %s with bad arg %d, type '%s' not instance of '%s'",
-                                       mh.GetName(), shorty_pos,
+                                       method->GetName(), shorty_pos,
                                        o->GetClass()->GetDescriptor().c_str(),
                                        arg_type->GetDescriptor().c_str());
               return false;
@@ -334,12 +801,10 @@
     Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
     String* name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
     ArtField* found = NULL;
-    FieldHelper fh;
     ObjectArray<ArtField>* fields = klass->GetIFields();
     for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
       ArtField* f = fields->Get(i);
-      fh.ChangeField(f);
-      if (name->Equals(fh.GetName())) {
+      if (name->Equals(f->GetName())) {
         found = f;
       }
     }
@@ -347,8 +812,7 @@
       fields = klass->GetSFields();
       for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
         ArtField* f = fields->Get(i);
-        fh.ChangeField(f);
-        if (name->Equals(fh.GetName())) {
+        if (name->Equals(f->GetName())) {
           found = f;
         }
       }
@@ -371,8 +835,9 @@
     Object* obj = shadow_frame->GetVRegReference(arg_offset);
     result->SetI(obj->IdentityHashCode());
   } else if (name == "java.lang.String java.lang.reflect.ArtMethod.getMethodName(java.lang.reflect.ArtMethod)") {
-    ArtMethod* method = shadow_frame->GetVRegReference(arg_offset)->AsArtMethod();
-    result->SetL(MethodHelper(method).GetNameAsString());
+    StackHandleScope<1> hs(self);
+    MethodHelper mh(hs.NewHandle(shadow_frame->GetVRegReference(arg_offset)->AsArtMethod()));
+    result->SetL(mh.GetNameAsString(self));
   } else if (name == "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)" ||
              name == "void java.lang.System.arraycopy(char[], int, char[], int, int)") {
     // Special case array copying without initializing System.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 029af8d..5277330 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -38,6 +38,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "mirror/string-inl.h"
 #include "object_utils.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
@@ -60,15 +61,6 @@
 using ::art::mirror::String;
 using ::art::mirror::Throwable;
 
-// b/14882674 Workaround stack overflow issue with clang
-#if defined(__clang__) && defined(__aarch64__)
-#define SOMETIMES_INLINE __attribute__((noinline))
-#define SOMETIMES_INLINE_KEYWORD
-#else
-#define SOMETIMES_INLINE ALWAYS_INLINE
-#define SOMETIMES_INLINE_KEYWORD inline
-#endif
-
 namespace art {
 namespace interpreter {
 
@@ -84,16 +76,8 @@
                               const DexFile::CodeItem* code_item,
                               ShadowFrame& shadow_frame, JValue result_register);
 
-// Workaround for b/14882674 where clang allocates stack for each ThrowLocation created by calls to
-// ShadowFrame::GetCurrentLocationForThrow(). Moving the call here prevents from doing such
-// allocation in the interpreter itself.
-static inline void ThrowNullPointerExceptionFromInterpreter(const ShadowFrame& shadow_frame)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE;
-
-static inline void ThrowNullPointerExceptionFromInterpreter(
-    const ShadowFrame& shadow_frame) {
-  ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-}
+void ThrowNullPointerExceptionFromInterpreter(const ShadowFrame& shadow_frame)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 static inline void DoMonitorEnter(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
   ref->MonitorEnter(self);
@@ -174,260 +158,28 @@
 // Handles iget-XXX and sget-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static SOMETIMES_INLINE_KEYWORD bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
-                                                const Instruction* inst, uint16_t inst_data) {
-  const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
-  const uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
-                                                              Primitive::FieldSize(field_type));
-  if (UNLIKELY(f == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-  Object* obj;
-  if (is_static) {
-    obj = f->GetDeclaringClass();
-  } else {
-    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(), f, true);
-      return false;
-    }
-  }
-  // Report this field access to instrumentation if needed.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
-    Object* this_object = f->IsStatic() ? nullptr : obj;
-    instrumentation->FieldReadEvent(self, this_object, shadow_frame.GetMethod(),
-                                    shadow_frame.GetDexPC(), f);
-  }
-  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      shadow_frame.SetVReg(vregA, f->GetBoolean(obj));
-      break;
-    case Primitive::kPrimByte:
-      shadow_frame.SetVReg(vregA, f->GetByte(obj));
-      break;
-    case Primitive::kPrimChar:
-      shadow_frame.SetVReg(vregA, f->GetChar(obj));
-      break;
-    case Primitive::kPrimShort:
-      shadow_frame.SetVReg(vregA, f->GetShort(obj));
-      break;
-    case Primitive::kPrimInt:
-      shadow_frame.SetVReg(vregA, f->GetInt(obj));
-      break;
-    case Primitive::kPrimLong:
-      shadow_frame.SetVRegLong(vregA, f->GetLong(obj));
-      break;
-    case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, f->GetObject(obj));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
+bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type>
-static SOMETIMES_INLINE_KEYWORD bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-  if (UNLIKELY(obj == nullptr)) {
-    // We lost the reference to the field index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-    return false;
-  }
-  MemberOffset field_offset(inst->VRegC_22c());
-  // Report this field access to instrumentation if needed. Since we only have the offset of
-  // the field from the base of the object, we need to look for it first.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
-    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
-                                                        field_offset.Uint32Value());
-    DCHECK(f != nullptr);
-    DCHECK(!f->IsStatic());
-    instrumentation->FieldReadEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
-                                    shadow_frame.GetDexPC(), f);
-  }
-  // Note: iget-x-quick instructions are only for non-volatile fields.
-  const uint32_t vregA = inst->VRegA_22c(inst_data);
-  switch (field_type) {
-    case Primitive::kPrimInt:
-      shadow_frame.SetVReg(vregA, static_cast<int32_t>(obj->GetField32(field_offset)));
-      break;
-    case Primitive::kPrimLong:
-      shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset)));
-      break;
-    case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object>(field_offset));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
-
-template<Primitive::Type field_type>
-static inline JValue GetFieldValue(const ShadowFrame& shadow_frame, uint32_t vreg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  JValue field_value;
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      field_value.SetZ(static_cast<uint8_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimByte:
-      field_value.SetB(static_cast<int8_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimChar:
-      field_value.SetC(static_cast<uint16_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimShort:
-      field_value.SetS(static_cast<int16_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimInt:
-      field_value.SetI(shadow_frame.GetVReg(vreg));
-      break;
-    case Primitive::kPrimLong:
-      field_value.SetJ(shadow_frame.GetVRegLong(vreg));
-      break;
-    case Primitive::kPrimNot:
-      field_value.SetL(shadow_frame.GetVRegReference(vreg));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-      break;
-  }
-  return field_value;
-}
+bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Handles iput-XXX and sput-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check, bool transaction_active>
-static SOMETIMES_INLINE_KEYWORD bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
-                                                const Instruction* inst, uint16_t inst_data) {
-  bool do_assignability_check = do_access_check;
-  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
-  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
-                                                              Primitive::FieldSize(field_type));
-  if (UNLIKELY(f == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-  Object* obj;
-  if (is_static) {
-    obj = f->GetDeclaringClass();
-  } else {
-    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(),
-                                              f, false);
-      return false;
-    }
-  }
-  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
-  // Report this field access to instrumentation if needed. Since we only have the offset of
-  // the field from the base of the object, we need to look for it first.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
-    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
-    Object* this_object = f->IsStatic() ? nullptr : obj;
-    instrumentation->FieldWriteEvent(self, this_object, shadow_frame.GetMethod(),
-                                     shadow_frame.GetDexPC(), f, field_value);
-  }
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      f->SetBoolean<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimByte:
-      f->SetByte<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimChar:
-      f->SetChar<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimShort:
-      f->SetShort<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimInt:
-      f->SetInt<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimLong:
-      f->SetLong<transaction_active>(obj, shadow_frame.GetVRegLong(vregA));
-      break;
-    case Primitive::kPrimNot: {
-      Object* reg = shadow_frame.GetVRegReference(vregA);
-      if (do_assignability_check && reg != nullptr) {
-        // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
-        // object in the destructor.
-        StackHandleScope<1> hs(self);
-        HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(&obj));
-        Class* field_class = FieldHelper(f).GetType();
-        if (!reg->VerifierInstanceOf(field_class)) {
-          // This should never happen.
-          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
-                                   "Ljava/lang/VirtualMachineError;",
-                                   "Put '%s' that is not instance of field '%s' in '%s'",
-                                   reg->GetClass()->GetDescriptor().c_str(),
-                                   field_class->GetDescriptor().c_str(),
-                                   f->GetDeclaringClass()->GetDescriptor().c_str());
-          return false;
-        }
-      }
-      f->SetObj<transaction_active>(obj, reg);
-      break;
-    }
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
+         bool transaction_active>
+bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Handles iput-quick, iput-wide-quick and iput-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type, bool transaction_active>
-static SOMETIMES_INLINE_KEYWORD bool DoIPutQuick(const ShadowFrame& shadow_frame,
-                                                 const Instruction* inst, uint16_t inst_data) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-  if (UNLIKELY(obj == nullptr)) {
-    // We lost the reference to the field index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-    return false;
-  }
-  MemberOffset field_offset(inst->VRegC_22c());
-  const uint32_t vregA = inst->VRegA_22c(inst_data);
-  // Report this field modification to instrumentation if needed. Since we only have the offset of
-  // the field from the base of the object, we need to look for it first.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
-    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
-                                                        field_offset.Uint32Value());
-    DCHECK(f != nullptr);
-    DCHECK(!f->IsStatic());
-    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
-    instrumentation->FieldWriteEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
-                                     shadow_frame.GetDexPC(), f, field_value);
-  }
-  // Note: iput-x-quick instructions are only for non-volatile fields.
-  switch (field_type) {
-    case Primitive::kPrimInt:
-      obj->SetField32<transaction_active>(field_offset, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimLong:
-      obj->SetField64<transaction_active>(field_offset, shadow_frame.GetVRegLong(vregA));
-      break;
-    case Primitive::kPrimNot:
-      obj->SetFieldObject<transaction_active>(field_offset, shadow_frame.GetVRegReference(vregA));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
+bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 
 // Handles string resolution for const-string and const-string-jumbo instructions. Also ensures the
 // java.lang.String class is initialized.
@@ -582,49 +334,14 @@
   return 3;
 }
 
-static inline uint32_t FindNextInstructionFollowingException(Thread* self,
-                                                             ShadowFrame& shadow_frame,
-                                                             uint32_t dex_pc,
-                                                             mirror::Object* this_object,
-                                                             const instrumentation::Instrumentation* instrumentation)
-SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE;
+uint32_t FindNextInstructionFollowingException(Thread* self, ShadowFrame& shadow_frame,
+    uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline uint32_t FindNextInstructionFollowingException(Thread* self,
-                                                             ShadowFrame& shadow_frame,
-                                                             uint32_t dex_pc,
-                                                             mirror::Object* this_object,
-                                                             const instrumentation::Instrumentation* instrumentation) {
-  self->VerifyStack();
-  ThrowLocation throw_location;
-  mirror::Throwable* exception = self->GetException(&throw_location);
-  bool clear_exception = false;
-  StackHandleScope<3> hs(self);
-  Handle<mirror::Class> exception_class(hs.NewHandle(exception->GetClass()));
-  uint32_t found_dex_pc = shadow_frame.GetMethod()->FindCatchBlock(exception_class, dex_pc,
-                                                                   &clear_exception);
-  if (found_dex_pc == DexFile::kDexNoIndex) {
-    instrumentation->MethodUnwindEvent(self, this_object,
-                                       shadow_frame.GetMethod(), dex_pc);
-  } else {
-    instrumentation->ExceptionCaughtEvent(self, throw_location,
-                                          shadow_frame.GetMethod(),
-                                          found_dex_pc, exception);
-    if (clear_exception) {
-      self->ClearException();
-    }
-  }
-  return found_dex_pc;
-}
-
-static inline void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
+void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
   __attribute__((cold, noreturn))
   SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh) {
-  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(&mh.GetDexFile());
-  exit(0);  // Unreachable, keep GCC happy.
-}
-
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
                                   const uint32_t dex_pc, MethodHelper& mh)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -634,7 +351,7 @@
     std::ostringstream oss;
     oss << PrettyMethod(shadow_frame.GetMethod())
         << StringPrintf("\n0x%x: ", dex_pc)
-        << inst->DumpString(&mh.GetDexFile()) << "\n";
+        << inst->DumpString(mh.GetMethod()->GetDexFile()) << "\n";
     for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
       uint32_t raw_value = shadow_frame.GetVReg(i);
       Object* ref_value = shadow_frame.GetVRegReference(i);
@@ -659,7 +376,7 @@
 
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                    \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                     \
   bool DoInvoke<_type, _is_range, _do_check>(Thread* self, ShadowFrame& shadow_frame,      \
                                              const Instruction* inst, uint16_t inst_data,  \
                                              JValue* result)
@@ -678,73 +395,9 @@
 #undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL
 
-// Explicitly instantiate all DoFieldGet functions.
-#define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check)                \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                        \
-  bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, ShadowFrame& shadow_frame, \
-                                                      const Instruction* inst, uint16_t inst_data)
-
-#define EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
-    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false);  \
-    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true);
-
-// iget-XXX
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstanceObjectRead, Primitive::kPrimNot);
-
-// sget-XXX
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticObjectRead, Primitive::kPrimNot);
-
-#undef EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL
-#undef EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL
-
-// Explicitly instantiate all DoFieldPut functions.
-#define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                                                   \
-  bool DoFieldPut<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, const ShadowFrame& shadow_frame, \
-                                                                           const Instruction* inst, uint16_t inst_data)
-
-#define EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, false);  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, false);  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, true);  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, true);
-
-// iput-XXX
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstanceObjectWrite, Primitive::kPrimNot);
-
-// sput-XXX
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticObjectWrite, Primitive::kPrimNot);
-
-#undef EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL
-#undef EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL
-
 // Explicitly instantiate all DoInvokeVirtualQuick functions.
 #define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                    \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                               \
   bool DoInvokeVirtualQuick<_is_range>(Thread* self, ShadowFrame& shadow_frame,      \
                                        const Instruction* inst, uint16_t inst_data,  \
                                        JValue* result)
@@ -753,33 +406,6 @@
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
-// Explicitly instantiate all DoIGetQuick functions.
-#define EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(_field_type)                            \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
-  bool DoIGetQuick<_field_type>(ShadowFrame& shadow_frame, const Instruction* inst,  \
-                                uint16_t inst_data)
-
-EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
-EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
-EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
-#undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
-
-// Explicitly instantiate all DoIPutQuick functions.
-#define EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, _transaction_active)        \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE               \
-  bool DoIPutQuick<_field_type, _transaction_active>(const ShadowFrame& shadow_frame, \
-                                                     const Instruction* inst,         \
-                                                     uint16_t inst_data)
-
-#define EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(_field_type)   \
-  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, false);     \
-  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, true);
-
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
-#undef EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL
-#undef EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 99153c8..cb4868c 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -247,7 +247,7 @@
       // If access checks are required then the dex-to-dex compiler and analysis of
       // whether the class has final fields hasn't been performed. Conservatively
       // perform the memory barrier now.
-      QuasiAtomic::MembarStoreLoad();
+      QuasiAtomic::ThreadFenceForConstructor();
     }
     if (UNLIKELY(self->TestAllFlags())) {
       CheckSuspend(self);
@@ -266,7 +266,7 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(RETURN_VOID_BARRIER) {
-    QuasiAtomic::MembarStoreLoad();
+    QuasiAtomic::ThreadFenceForConstructor();
     JValue result;
     if (UNLIKELY(self->TestAllFlags())) {
       CheckSuspend(self);
@@ -328,11 +328,13 @@
     if (UNLIKELY(self->TestAllFlags())) {
       CheckSuspend(self);
     }
-    Object* obj_result = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
-    result.SetJ(0);
-    result.SetL(obj_result);
+    const uint8_t vreg_index = inst->VRegA_11x(inst_data);
+    Object* obj_result = shadow_frame.GetVRegReference(vreg_index);
     if (do_assignability_check && obj_result != NULL) {
-      Class* return_type = MethodHelper(shadow_frame.GetMethod()).GetReturnType();
+      StackHandleScope<1> hs(self);
+      MethodHelper mh(hs.NewHandle(shadow_frame.GetMethod()));
+      Class* return_type = mh.GetReturnType();
+      obj_result = shadow_frame.GetVRegReference(vreg_index);
       if (return_type == NULL) {
         // Return the pending exception.
         HANDLE_PENDING_EXCEPTION();
@@ -347,6 +349,7 @@
         HANDLE_PENDING_EXCEPTION();
       }
     }
+    result.SetL(obj_result);
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -2388,10 +2391,8 @@
       CheckSuspend(self);
       UPDATE_HANDLER_TABLE();
     }
-    Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
-                                                                  this_object,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
       return JValue(); /* Handled in caller. */
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 3c7880c..bdf2a20 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -25,10 +25,8 @@
     if (UNLIKELY(self->TestAllFlags())) {                                                       \
       CheckSuspend(self);                                                                       \
     }                                                                                           \
-    Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                     \
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame,           \
                                                                   inst->GetDexPc(insns),        \
-                                                                  this_object,                  \
                                                                   instrumentation);             \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
       return JValue(); /* Handled in caller. */                                                 \
@@ -175,7 +173,7 @@
           // If access checks are required then the dex-to-dex compiler and analysis of
           // whether the class has final fields hasn't been performed. Conservatively
           // perform the memory barrier now.
-          QuasiAtomic::MembarStoreLoad();
+          QuasiAtomic::ThreadFenceForConstructor();
         }
         if (UNLIKELY(self->TestAllFlags())) {
           CheckSuspend(self);
@@ -191,7 +189,7 @@
         return result;
       }
       case Instruction::RETURN_VOID_BARRIER: {
-        QuasiAtomic::MembarStoreLoad();
+        QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         if (UNLIKELY(self->TestAllFlags())) {
           CheckSuspend(self);
@@ -244,11 +242,14 @@
         if (UNLIKELY(self->TestAllFlags())) {
           CheckSuspend(self);
         }
-        Object* obj_result = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
-        result.SetJ(0);
-        result.SetL(obj_result);
+        const size_t ref_idx = inst->VRegA_11x(inst_data);
+        Object* obj_result = shadow_frame.GetVRegReference(ref_idx);
         if (do_assignability_check && obj_result != NULL) {
-          Class* return_type = MethodHelper(shadow_frame.GetMethod()).GetReturnType();
+          StackHandleScope<1> hs(self);
+          MethodHelper mhs(hs.NewHandle(shadow_frame.GetMethod()));
+          Class* return_type = mhs.GetReturnType();
+          // Re-load since it might have moved.
+          obj_result = shadow_frame.GetVRegReference(ref_idx);
           if (return_type == NULL) {
             // Return the pending exception.
             HANDLE_PENDING_EXCEPTION();
@@ -263,6 +264,7 @@
             HANDLE_PENDING_EXCEPTION();
           }
         }
+        result.SetL(obj_result);
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 49dceb2..d637a94 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -31,8 +31,7 @@
 }
 
 ObjectRegistry::ObjectRegistry()
-    : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), allow_new_objects_(true),
-      condition_("object registry condition", lock_), next_id_(1) {
+    : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), next_id_(1) {
 }
 
 JDWP::RefTypeId ObjectRegistry::AddRefType(mirror::Class* c) {
@@ -44,20 +43,17 @@
 }
 
 JDWP::ObjectId ObjectRegistry::InternalAdd(mirror::Object* o) {
-  if (o == NULL) {
+  if (o == nullptr) {
     return 0;
   }
 
+  // Call IdentityHashCode here to avoid a lock level violation between lock_ and monitor_lock.
+  int32_t identity_hash_code = o->IdentityHashCode();
   ScopedObjectAccessUnchecked soa(Thread::Current());
   MutexLock mu(soa.Self(), lock_);
-  while (UNLIKELY(!allow_new_objects_)) {
-    condition_.WaitHoldingLocks(soa.Self());
-  }
-  ObjectRegistryEntry* entry;
-  auto it = object_to_entry_.find(o);
-  if (it != object_to_entry_.end()) {
+  ObjectRegistryEntry* entry = nullptr;
+  if (ContainsLocked(soa.Self(), o, identity_hash_code, &entry)) {
     // This object was already in our map.
-    entry = it->second;
     ++entry->reference_count;
   } else {
     entry = new ObjectRegistryEntry;
@@ -65,7 +61,8 @@
     entry->jni_reference = nullptr;
     entry->reference_count = 0;
     entry->id = 0;
-    object_to_entry_.insert(std::make_pair(o, entry));
+    entry->identity_hash_code = identity_hash_code;
+    object_to_entry_.insert(std::make_pair(identity_hash_code, entry));
 
     // This object isn't in the registry yet, so add it.
     JNIEnv* env = soa.Env();
@@ -84,9 +81,31 @@
   return entry->id;
 }
 
-bool ObjectRegistry::Contains(mirror::Object* o) {
-  MutexLock mu(Thread::Current(), lock_);
-  return object_to_entry_.find(o) != object_to_entry_.end();
+bool ObjectRegistry::Contains(mirror::Object* o, ObjectRegistryEntry** out_entry) {
+  if (o == nullptr) {
+    return false;
+  }
+  // Call IdentityHashCode here to avoid a lock level violation between lock_ and monitor_lock.
+  int32_t identity_hash_code = o->IdentityHashCode();
+  Thread* self = Thread::Current();
+  MutexLock mu(self, lock_);
+  return ContainsLocked(self, o, identity_hash_code, out_entry);
+}
+
+bool ObjectRegistry::ContainsLocked(Thread* self, mirror::Object* o, int32_t identity_hash_code,
+                                    ObjectRegistryEntry** out_entry) {
+  DCHECK(o != nullptr);
+  for (auto it = object_to_entry_.lower_bound(identity_hash_code), end = object_to_entry_.end();
+       it != end && it->first == identity_hash_code; ++it) {
+    ObjectRegistryEntry* entry = it->second;
+    if (o == self->DecodeJObject(entry->jni_reference)) {
+      if (out_entry != nullptr) {
+        *out_entry = entry;
+      }
+      return true;
+    }
+  }
+  return false;
 }
 
 void ObjectRegistry::Clear() {
@@ -194,47 +213,24 @@
   entry->reference_count -= reference_count;
   if (entry->reference_count <= 0) {
     JNIEnv* env = self->GetJniEnv();
-    mirror::Object* object = self->DecodeJObject(entry->jni_reference);
+    // Erase the object from the maps. Note object may be null if it's
+    // a weak ref and the GC has cleared it.
+    int32_t hash_code = entry->identity_hash_code;
+    for (auto it = object_to_entry_.lower_bound(hash_code), end = object_to_entry_.end();
+         it != end && it->first == hash_code; ++it) {
+      if (entry == it->second) {
+        object_to_entry_.erase(it);
+        break;
+      }
+    }
     if (entry->jni_reference_type == JNIWeakGlobalRefType) {
       env->DeleteWeakGlobalRef(entry->jni_reference);
     } else {
       env->DeleteGlobalRef(entry->jni_reference);
     }
-    object_to_entry_.erase(object);
     id_to_entry_.erase(id);
     delete entry;
   }
 }
 
-void ObjectRegistry::UpdateObjectPointers(IsMarkedCallback* callback, void* arg) {
-  MutexLock mu(Thread::Current(), lock_);
-  if (object_to_entry_.empty()) {
-    return;
-  }
-  std::map<mirror::Object*, ObjectRegistryEntry*> new_object_to_entry;
-  for (auto& pair : object_to_entry_) {
-    mirror::Object* new_obj;
-    if (pair.first != nullptr) {
-      new_obj = callback(pair.first, arg);
-      if (new_obj != nullptr) {
-        new_object_to_entry.insert(std::make_pair(new_obj, pair.second));
-      }
-    }
-  }
-  object_to_entry_ = new_object_to_entry;
-}
-
-void ObjectRegistry::AllowNewObjects() {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, lock_);
-  allow_new_objects_ = true;
-  condition_.Broadcast(self);
-}
-
-void ObjectRegistry::DisallowNewObjects() {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, lock_);
-  allow_new_objects_ = false;
-}
-
 }  // namespace art
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 3c6cb15..e1a6875 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -43,6 +43,10 @@
 
   // The corresponding id, so we only need one map lookup in Add.
   JDWP::ObjectId id;
+
+  // The identity hash code of the object. This is the same as the key
+  // for object_to_entry_. Store this for DisposeObject().
+  int32_t identity_hash_code;
 };
 std::ostream& operator<<(std::ostream& os, const ObjectRegistryEntry& rhs);
 
@@ -55,7 +59,8 @@
  public:
   ObjectRegistry();
 
-  JDWP::ObjectId Add(mirror::Object* o) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  JDWP::ObjectId Add(mirror::Object* o)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::thread_list_lock_);
   JDWP::RefTypeId AddRefType(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<typename T> T Get(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -65,7 +70,9 @@
     return reinterpret_cast<T>(InternalGet(id));
   }
 
-  bool Contains(mirror::Object* o) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Contains(mirror::Object* o) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return Contains(o, nullptr);
+  }
 
   void Clear() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -84,26 +91,20 @@
   // Avoid using this and use standard Get when possible.
   jobject GetJObject(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Visit, objects are treated as system weaks.
-  void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // We have allow / disallow functionality since we use system weak sweeping logic to update moved
-  // objects inside of the object_to_entry_ map.
-  void AllowNewObjects() LOCKS_EXCLUDED(lock_);
-  void DisallowNewObjects() LOCKS_EXCLUDED(lock_);
-
  private:
-  JDWP::ObjectId InternalAdd(mirror::Object* o) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  JDWP::ObjectId InternalAdd(mirror::Object* o)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::thread_list_lock_);
   mirror::Object* InternalGet(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Demote(ObjectRegistryEntry& entry) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, lock_);
   void Promote(ObjectRegistryEntry& entry) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, lock_);
+  bool Contains(mirror::Object* o, ObjectRegistryEntry** out_entry)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool ContainsLocked(Thread* self, mirror::Object* o, int32_t identity_hash_code,
+                      ObjectRegistryEntry** out_entry)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  bool allow_new_objects_ GUARDED_BY(lock_);
-  ConditionVariable condition_ GUARDED_BY(lock_);
-
-  std::map<mirror::Object*, ObjectRegistryEntry*> object_to_entry_ GUARDED_BY(lock_);
+  std::multimap<int32_t, ObjectRegistryEntry*> object_to_entry_ GUARDED_BY(lock_);
   SafeMap<JDWP::ObjectId, ObjectRegistryEntry*> id_to_entry_ GUARDED_BY(lock_);
 
   size_t next_id_ GUARDED_BY(lock_);
diff --git a/runtime/jni_internal-inl.h b/runtime/jni_internal-inl.h
new file mode 100644
index 0000000..6cf9a61
--- /dev/null
+++ b/runtime/jni_internal-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JNI_INTERNAL_INL_H_
+#define ART_RUNTIME_JNI_INTERNAL_INL_H_
+
+#include "jni_internal.h"
+
+#include "utils.h"
+
+namespace art {
+
+template<typename T>
+inline T JNIEnvExt::AddLocalReference(mirror::Object* obj) {
+  IndirectRef ref = locals.Add(local_ref_cookie, obj);
+
+  // TODO: fix this to understand PushLocalFrame, so we can turn it on.
+  if (false) {
+    if (check_jni) {
+      size_t entry_count = locals.Capacity();
+      if (entry_count > 16) {
+        locals.Dump(LOG(WARNING) << "Warning: more than 16 JNI local references: "
+            << entry_count << " (most recent was a " << PrettyTypeOf(obj) << ")\n");
+        // TODO: LOG(FATAL) in a later release?
+      }
+    }
+  }
+
+  return reinterpret_cast<T>(ref);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JNI_INTERNAL_INL_H_
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 19ee1ff..66406bf 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -682,6 +682,7 @@
     auto old_throw_method(hs.NewHandle<mirror::ArtMethod>(nullptr));
     auto old_exception(hs.NewHandle<mirror::Throwable>(nullptr));
     uint32_t old_throw_dex_pc;
+    bool old_is_exception_reported;
     {
       ThrowLocation old_throw_location;
       mirror::Throwable* old_exception_obj = soa.Self()->GetException(&old_throw_location);
@@ -689,6 +690,7 @@
       old_throw_method.Assign(old_throw_location.GetMethod());
       old_exception.Assign(old_exception_obj);
       old_throw_dex_pc = old_throw_location.GetDexPc();
+      old_is_exception_reported = soa.Self()->IsExceptionReportedToInstrumentation();
       soa.Self()->ClearException();
     }
     ScopedLocalRef<jthrowable> exception(env,
@@ -710,6 +712,7 @@
                                          old_throw_dex_pc);
 
     soa.Self()->SetException(gc_safe_throw_location, old_exception.Get());
+    soa.Self()->SetExceptionReportedToInstrumentation(old_is_exception_reported);
   }
 
   static jthrowable ExceptionOccurred(JNIEnv* env) {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 4072da4..abb71b7 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -24,7 +24,6 @@
 #include "indirect_reference_table.h"
 #include "object_callbacks.h"
 #include "reference_table.h"
-#include "runtime.h"
 
 #include <iosfwd>
 #include <string>
@@ -45,6 +44,7 @@
 union JValue;
 class Libraries;
 class ParsedOptions;
+class Runtime;
 class ScopedObjectAccess;
 template<class T> class Handle;
 class Thread;
@@ -216,25 +216,6 @@
   DISALLOW_COPY_AND_ASSIGN(ScopedJniEnvLocalRefState);
 };
 
-template<typename T>
-inline T JNIEnvExt::AddLocalReference(mirror::Object* obj) {
-  IndirectRef ref = locals.Add(local_ref_cookie, obj);
-
-  // TODO: fix this to understand PushLocalFrame, so we can turn it on.
-  if (false) {
-    if (check_jni) {
-      size_t entry_count = locals.Capacity();
-      if (entry_count > 16) {
-        locals.Dump(LOG(WARNING) << "Warning: more than 16 JNI local references: "
-            << entry_count << " (most recent was a " << PrettyTypeOf(obj) << ")\n");
-        // TODO: LOG(FATAL) in a later release?
-      }
-    }
-  }
-
-  return reinterpret_cast<T>(ref);
-}
-
 }  // namespace art
 
 std::ostream& operator<<(std::ostream& os, const jobjectRefType& rhs);
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 22a61a2..81a8623 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -473,6 +473,18 @@
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot);
 }
 
+void MemMap::MadviseDontNeedAndZero() {
+  if (base_begin_ != nullptr || base_size_ != 0) {
+    if (!kMadviseZeroes) {
+      memset(base_begin_, 0, base_size_);
+    }
+    int result = madvise(base_begin_, base_size_, MADV_DONTNEED);
+    if (result == -1) {
+      PLOG(WARNING) << "madvise failed";
+    }
+  }
+}
+
 bool MemMap::Protect(int prot) {
   if (base_begin_ == nullptr && base_size_ == 0) {
     prot_ = prot;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index dc5909b..e42251c 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -30,6 +30,12 @@
 
 namespace art {
 
+#ifdef __linux__
+static constexpr bool kMadviseZeroes = true;
+#else
+static constexpr bool kMadviseZeroes = false;
+#endif
+
 // Used to keep track of mmap segments.
 //
 // On 64b systems not supporting MAP_32BIT, the implementation of MemMap will do a linear scan
@@ -77,6 +83,8 @@
 
   bool Protect(int prot);
 
+  void MadviseDontNeedAndZero();
+
   int GetProtect() const {
     return prot_;
   }
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index fe76c92..69f618c 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -198,16 +198,17 @@
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
+  uintptr_t start_addr = ART_BASE_ADDRESS + 0x1000000;
   std::string error_msg;
   std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
-                                             reinterpret_cast<byte*>(0x71000000),
+                                             reinterpret_cast<byte*>(start_addr),
                                              0x21000000,
                                              PROT_READ | PROT_WRITE,
                                              true,
                                              &error_msg));
   ASSERT_TRUE(map.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
-  ASSERT_EQ(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 0x71000000U);
+  ASSERT_EQ(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), start_addr);
 }
 
 TEST_F(MemMapTest, MapAnonymousOverflow) {
diff --git a/runtime/mirror/art_field-inl.h b/runtime/mirror/art_field-inl.h
index ad24d0a..686fded 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/mirror/art_field-inl.h
@@ -116,60 +116,52 @@
 }
 
 inline bool ArtField::GetBoolean(Object* object) {
-  DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimBoolean, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetBoolean(Object* object, bool z) {
-  DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimBoolean, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, z);
 }
 
 inline int8_t ArtField::GetByte(Object* object) {
-  DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimByte, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetByte(Object* object, int8_t b) {
-  DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimByte, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, b);
 }
 
 inline uint16_t ArtField::GetChar(Object* object) {
-  DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimChar, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetChar(Object* object, uint16_t c) {
-  DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimChar, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, c);
 }
 
 inline int16_t ArtField::GetShort(Object* object) {
-  DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimShort, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetShort(Object* object, int16_t s) {
-  DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimShort, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, s);
 }
 
 inline int32_t ArtField::GetInt(Object* object) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
   }
   return Get32(object);
@@ -178,7 +170,7 @@
 template<bool kTransactionActive>
 inline void ArtField::SetInt(Object* object, int32_t i) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
   }
   Set32<kTransactionActive>(object, i);
@@ -186,7 +178,7 @@
 
 inline int64_t ArtField::GetLong(Object* object) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
   }
   return Get64(object);
@@ -195,15 +187,14 @@
 template<bool kTransactionActive>
 inline void ArtField::SetLong(Object* object, int64_t j) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
   }
   Set64<kTransactionActive>(object, j);
 }
 
 inline float ArtField::GetFloat(Object* object) {
-  DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetI(Get32(object));
   return bits.GetF();
@@ -211,16 +202,14 @@
 
 template<bool kTransactionActive>
 inline void ArtField::SetFloat(Object* object, float f) {
-  DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetF(f);
   Set32<kTransactionActive>(object, bits.GetI());
 }
 
 inline double ArtField::GetDouble(Object* object) {
-  DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetJ(Get64(object));
   return bits.GetD();
@@ -228,26 +217,68 @@
 
 template<bool kTransactionActive>
 inline void ArtField::SetDouble(Object* object, double d) {
-  DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetD(d);
   Set64<kTransactionActive>(object, bits.GetJ());
 }
 
 inline Object* ArtField::GetObject(Object* object) {
-  DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
   return GetObj(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetObject(Object* object, Object* l) {
-  DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
   SetObj<kTransactionActive>(object, l);
 }
 
+inline const char* ArtField::GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  uint32_t field_index = GetDexFieldIndex();
+  if (UNLIKELY(GetDeclaringClass()->IsProxyClass())) {
+    DCHECK(IsStatic());
+    DCHECK_LT(field_index, 2U);
+    return field_index == 0 ? "interfaces" : "throws";
+  }
+  const DexFile* dex_file = GetDexFile();
+  return dex_file->GetFieldName(dex_file->GetFieldId(field_index));
+}
+
+inline const char* ArtField::GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  uint32_t field_index = GetDexFieldIndex();
+  if (UNLIKELY(GetDeclaringClass()->IsProxyClass())) {
+    DCHECK(IsStatic());
+    DCHECK_LT(field_index, 2U);
+    // 0 == Class[] interfaces; 1 == Class[][] throws;
+    return field_index == 0 ? "[Ljava/lang/Class;" : "[[Ljava/lang/Class;";
+  }
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+  return dex_file->GetFieldTypeDescriptor(field_id);
+}
+
+inline Primitive::Type ArtField::GetTypeAsPrimitiveType()
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return Primitive::GetType(GetTypeDescriptor()[0]);
+}
+
+inline bool ArtField::IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return GetTypeAsPrimitiveType() != Primitive::kPrimNot;
+}
+
+inline size_t ArtField::FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return Primitive::FieldSize(GetTypeAsPrimitiveType());
+}
+
+inline mirror::DexCache* ArtField::GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return GetDeclaringClass()->GetDexCache();
+}
+
+inline const DexFile* ArtField::GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return GetDexCache()->GetDexFile();
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index b3b1b71..f2729f6 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -55,7 +55,7 @@
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   if (kIsDebugBuild && Runtime::Current()->IsCompiler() &&
       !Runtime::Current()->UseCompileTimeClassPath()) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     if (type == Primitive::kPrimDouble || type == Primitive::kPrimLong) {
       DCHECK_ALIGNED(num_bytes.Uint32Value(), 8);
     }
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 30cd180..4858613 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -139,6 +139,14 @@
   static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Primitive::Type GetTypeAsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 5f4619b..8fcacc2 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -295,7 +295,9 @@
   if (UNLIKELY(entry_point == runtime->GetClassLinker()->GetQuickGenericJniTrampoline())) {
     // Generic JNI frame.
     DCHECK(IsNative());
-    uint32_t handle_refs = MethodHelper(this).GetNumberOfReferenceArgsWithoutReceiver() + 1;
+    StackHandleScope<1> hs(Thread::Current());
+    uint32_t handle_refs =
+        MethodHelper(hs.NewHandle(this)).GetNumberOfReferenceArgsWithoutReceiver() + 1;
     size_t scope_size = HandleScope::SizeOf(handle_refs);
     QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
 
@@ -314,10 +316,143 @@
 
 inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
   DCHECK(code_pointer != nullptr);
-  DCHECK(code_pointer == GetQuickOatCodePointer());
+  DCHECK_EQ(code_pointer, GetQuickOatCodePointer());
   return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
 }
 
+inline const DexFile* ArtMethod::GetDexFile() {
+  return GetInterfaceMethodIfProxy()->GetDeclaringClass()->GetDexCache()->GetDexFile();
+}
+
+inline const char* ArtMethod::GetDeclaringClassDescriptor() {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  uint32_t dex_method_idx = method->GetDexMethodIndex();
+  if (UNLIKELY(dex_method_idx == DexFile::kDexNoIndex)) {
+    return "<runtime method>";
+  }
+  const DexFile* dex_file = method->GetDexFile();
+  return dex_file->GetMethodDeclaringClassDescriptor(dex_file->GetMethodId(dex_method_idx));
+}
+
+inline const char* ArtMethod::GetShorty(uint32_t* out_length) {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  const DexFile* dex_file = method->GetDexFile();
+  return dex_file->GetMethodShorty(dex_file->GetMethodId(method->GetDexMethodIndex()), out_length);
+}
+
+inline const Signature ArtMethod::GetSignature() {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  uint32_t dex_method_idx = method->GetDexMethodIndex();
+  if (dex_method_idx != DexFile::kDexNoIndex) {
+    const DexFile* dex_file = method->GetDexFile();
+    return dex_file->GetMethodSignature(dex_file->GetMethodId(dex_method_idx));
+  }
+  return Signature::NoSignature();
+}
+
+inline const char* ArtMethod::GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  uint32_t dex_method_idx = method->GetDexMethodIndex();
+  if (LIKELY(dex_method_idx != DexFile::kDexNoIndex)) {
+    const DexFile* dex_file = method->GetDexFile();
+    return dex_file->GetMethodName(dex_file->GetMethodId(dex_method_idx));
+  }
+  Runtime* runtime = Runtime::Current();
+  if (method == runtime->GetResolutionMethod()) {
+    return "<runtime internal resolution method>";
+  } else if (method == runtime->GetImtConflictMethod()) {
+    return "<runtime internal imt conflict method>";
+  } else if (method == runtime->GetCalleeSaveMethod(Runtime::kSaveAll)) {
+    return "<runtime internal callee-save all registers method>";
+  } else if (method == runtime->GetCalleeSaveMethod(Runtime::kRefsOnly)) {
+    return "<runtime internal callee-save reference registers method>";
+  } else if (method == runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs)) {
+    return "<runtime internal callee-save reference and argument registers method>";
+  } else {
+    return "<unknown runtime internal method>";
+  }
+}
+
+inline const DexFile::CodeItem* ArtMethod::GetCodeItem() {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  return method->GetDexFile()->GetCodeItem(method->GetCodeItemOffset());
+}
+
+inline bool ArtMethod::IsResolvedTypeIdx(uint16_t type_idx) {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  return method->GetDexCacheResolvedTypes()->Get(type_idx) != nullptr;
+}
+
+inline int32_t ArtMethod::GetLineNumFromDexPC(uint32_t dex_pc) {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  if (dex_pc == DexFile::kDexNoIndex) {
+    return method->IsNative() ? -2 : -1;
+  }
+  return method->GetDexFile()->GetLineNumFromPC(method, dex_pc);
+}
+
+inline const DexFile::ProtoId& ArtMethod::GetPrototype() {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  const DexFile* dex_file = method->GetDexFile();
+  return dex_file->GetMethodPrototype(dex_file->GetMethodId(method->GetDexMethodIndex()));
+}
+
+inline const DexFile::TypeList* ArtMethod::GetParameterTypeList() {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  const DexFile* dex_file = method->GetDexFile();
+  const DexFile::ProtoId& proto = dex_file->GetMethodPrototype(
+      dex_file->GetMethodId(method->GetDexMethodIndex()));
+  return dex_file->GetProtoParameters(proto);
+}
+
+inline const char* ArtMethod::GetDeclaringClassSourceFile() {
+  return GetInterfaceMethodIfProxy()->GetDeclaringClass()->GetSourceFile();
+}
+
+inline uint16_t ArtMethod::GetClassDefIndex() {
+  return GetInterfaceMethodIfProxy()->GetDeclaringClass()->GetDexClassDefIndex();
+}
+
+inline const DexFile::ClassDef& ArtMethod::GetClassDef() {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  return method->GetDexFile()->GetClassDef(GetClassDefIndex());
+}
+
+inline const char* ArtMethod::GetReturnTypeDescriptor() {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  const DexFile* dex_file = method->GetDexFile();
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(method->GetDexMethodIndex());
+  const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
+  uint16_t return_type_idx = proto_id.return_type_idx_;
+  return dex_file->GetTypeDescriptor(dex_file->GetTypeId(return_type_idx));
+}
+
+inline const char* ArtMethod::GetTypeDescriptorFromTypeIdx(uint16_t type_idx) {
+  mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
+  const DexFile* dex_file = method->GetDexFile();
+  return dex_file->GetTypeDescriptor(dex_file->GetTypeId(type_idx));
+}
+
+inline mirror::ClassLoader* ArtMethod::GetClassLoader() {
+  return GetInterfaceMethodIfProxy()->GetDeclaringClass()->GetClassLoader();
+}
+
+inline mirror::DexCache* ArtMethod::GetDexCache() {
+  return GetInterfaceMethodIfProxy()->GetDeclaringClass()->GetDexCache();
+}
+
+inline ArtMethod* ArtMethod::GetInterfaceMethodIfProxy() {
+  mirror::Class* klass = GetDeclaringClass();
+  if (LIKELY(!klass->IsProxyClass())) {
+    return this;
+  }
+  mirror::ArtMethod* interface_method = GetDexCacheResolvedMethods()->Get(GetDexMethodIndex());
+  DCHECK(interface_method != nullptr);
+  DCHECK_EQ(interface_method,
+            Runtime::Current()->GetClassLinker()->FindMethodForProxy(klass, this));
+  return interface_method;
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index c01fc72..4821e29 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -142,16 +142,16 @@
       CHECK_EQ(result,
                Runtime::Current()->GetClassLinker()->FindMethodForProxy(GetDeclaringClass(), this));
     } else {
-      MethodHelper mh(this);
-      MethodHelper interface_mh;
+      StackHandleScope<2> hs(Thread::Current());
+      MethodHelper mh(hs.NewHandle(this));
+      MethodHelper interface_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
       IfTable* iftable = GetDeclaringClass()->GetIfTable();
       for (size_t i = 0; i < iftable->Count() && result == NULL; i++) {
         Class* interface = iftable->GetInterface(i);
         for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-          ArtMethod* interface_method = interface->GetVirtualMethod(j);
-          interface_mh.ChangeMethod(interface_method);
+          interface_mh.ChangeMethod(interface->GetVirtualMethod(j));
           if (mh.HasSameNameAndSignature(&interface_mh)) {
-            result = interface_method;
+            result = interface_mh.GetMethod();
             break;
           }
         }
@@ -159,8 +159,10 @@
     }
   }
 #ifndef NDEBUG
-  MethodHelper result_mh(result);
-  DCHECK(result == NULL || MethodHelper(this).HasSameNameAndSignature(&result_mh));
+  StackHandleScope<2> hs(Thread::Current());
+  MethodHelper result_mh(hs.NewHandle(result));
+  MethodHelper this_mh(hs.NewHandle(this));
+  DCHECK(result == NULL || this_mh.HasSameNameAndSignature(&result_mh));
 #endif
   return result;
 }
@@ -229,15 +231,16 @@
   return 0;
 }
 
-uint32_t ArtMethod::FindCatchBlock(Handle<Class> exception_type, uint32_t dex_pc,
-                                   bool* has_no_move_exception) {
-  MethodHelper mh(this);
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+uint32_t ArtMethod::FindCatchBlock(Handle<ArtMethod> h_this, Handle<Class> exception_type,
+                                   uint32_t dex_pc, bool* has_no_move_exception) {
+  MethodHelper mh(h_this);
+  const DexFile::CodeItem* code_item = h_this->GetCodeItem();
   // Set aside the exception while we resolve its type.
   Thread* self = Thread::Current();
   ThrowLocation throw_location;
   StackHandleScope<1> hs(self);
   Handle<mirror::Throwable> exception(hs.NewHandle(self->GetException(&throw_location)));
+  bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
   self->ClearException();
   // Default to handler not found.
   uint32_t found_dex_pc = DexFile::kDexNoIndex;
@@ -260,7 +263,7 @@
       // release its in use context at the end.
       delete self->GetLongJumpContext();
       LOG(WARNING) << "Unresolved exception class when finding catch block: "
-        << DescriptorToDot(mh.GetTypeDescriptorFromTypeIdx(iter_type_idx));
+        << DescriptorToDot(h_this->GetTypeDescriptorFromTypeIdx(iter_type_idx));
     } else if (iter_exception_type->IsAssignableFrom(exception_type.Get())) {
       found_dex_pc = it.GetHandlerAddress();
       break;
@@ -274,6 +277,7 @@
   // Put the exception back.
   if (exception.Get() != nullptr) {
     self->SetException(throw_location, exception.Get());
+    self->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
   return found_dex_pc;
 }
@@ -283,7 +287,7 @@
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
-    CHECK_STREQ(MethodHelper(this).GetShorty(), shorty);
+    CHECK_STREQ(GetShorty(), shorty);
   }
 
   // Push a transition back into managed code onto the linked list in thread.
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index f901512..1c21b81 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -88,6 +88,11 @@
     return (GetAccessFlags() & kAccConstructor) != 0;
   }
 
+  // Returns true if the method is a class initializer.
+  bool IsClassInitializer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsConstructor() && IsStatic();
+  }
+
   // Returns true if the method is static, private, or a constructor.
   bool IsDirect() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return IsDirect(GetAccessFlags());
@@ -216,14 +221,14 @@
   // Find the method that this method overrides
   ArtMethod* FindOverriddenMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
-              const char* shorty) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, const char* shorty)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   EntryPointFromInterpreter* GetEntryPointFromInterpreter()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldPtr<EntryPointFromInterpreter*, kVerifyFlags>(
-               OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_));
+        OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_));
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -281,7 +286,7 @@
      *
      * NOTE: For Thumb both pc and code are offset by 1 indicating the Thumb state.
      */
-    return (code <= pc && pc <= code + GetCodeSize());
+    return code <= pc && pc <= code + GetCodeSize();
   }
 
   void AssertPcIsWithinQuickCode(uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -398,8 +403,8 @@
   // Find the catch block for the given exception type and dex_pc. When a catch block is found,
   // indicates whether the found catch block is responsible for clearing the exception or whether
   // a move-exception instruction is present.
-  uint32_t FindCatchBlock(Handle<Class> exception_type, uint32_t dex_pc,
-                          bool* has_no_move_exception)
+  static uint32_t FindCatchBlock(Handle<ArtMethod> h_this, Handle<Class> exception_type,
+                                 uint32_t dex_pc, bool* has_no_move_exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void SetClass(Class* java_lang_reflect_ArtMethod);
@@ -414,6 +419,30 @@
   static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetDeclaringClassDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t unused_length;
+    return GetShorty(&unused_length);
+  }
+  const char* GetShorty(uint32_t* out_length) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const Signature GetSignature() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile::CodeItem* GetCodeItem() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsResolvedTypeIdx(uint16_t type_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int32_t GetLineNumFromDexPC(uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile::ProtoId& GetPrototype() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile::TypeList* GetParameterTypeList() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetDeclaringClassSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint16_t GetClassDefIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile::ClassDef& GetClassDef() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetReturnTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetTypeDescriptorFromTypeIdx(uint16_t type_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::ClassLoader* GetClassLoader() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetInterfaceMethodIfProxy() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 42ef68a..a20f7b9 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -84,7 +84,7 @@
     Handle<mirror::Object> old_throw_this_object(hs.NewHandle(old_throw_location.GetThis()));
     Handle<mirror::ArtMethod> old_throw_method(hs.NewHandle(old_throw_location.GetMethod()));
     uint32_t old_throw_dex_pc = old_throw_location.GetDexPc();
-
+    bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
     // clear exception to call FindSystemClass
     self->ClearException();
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -102,8 +102,8 @@
     // Restore exception.
     ThrowLocation gc_safe_throw_location(old_throw_this_object.Get(), old_throw_method.Get(),
                                          old_throw_dex_pc);
-
     self->SetException(gc_safe_throw_location, old_exception.Get());
+    self->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
   COMPILE_ASSERT(sizeof(Status) == sizeof(uint32_t), size_of_status_not_uint32);
   if (Runtime::Current()->IsActiveTransaction()) {
@@ -366,11 +366,9 @@
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) {
-  MethodHelper mh;
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
-    mh.ChangeMethod(method);
-    if (name == mh.GetName() && mh.GetSignature() == signature) {
+    if (name == method->GetName() && method->GetSignature() == signature) {
       return method;
     }
   }
@@ -378,11 +376,9 @@
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) {
-  MethodHelper mh;
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
-    mh.ChangeMethod(method);
-    if (name == mh.GetName() && signature == mh.GetSignature()) {
+    if (name == method->GetName() && signature == method->GetSignature()) {
       return method;
     }
   }
@@ -432,24 +428,19 @@
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) {
-  MethodHelper mh;
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     ArtMethod* method = GetVirtualMethod(i);
-    mh.ChangeMethod(method);
-    if (name == mh.GetName() && mh.GetSignature() == signature) {
+    if (name == method->GetName() && method->GetSignature() == signature) {
       return method;
     }
   }
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
-                                            const Signature& signature) {
-  MethodHelper mh;
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature) {
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     ArtMethod* method = GetVirtualMethod(i);
-    mh.ChangeMethod(method);
-    if (name == mh.GetName() && signature == mh.GetSignature()) {
+    if (name == method->GetName() && signature == method->GetSignature()) {
       return method;
     }
   }
@@ -501,13 +492,9 @@
 ArtMethod* Class::FindClassInitializer() {
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
-    if (method->IsConstructor() && method->IsStatic()) {
-      if (kIsDebugBuild) {
-        MethodHelper mh(method);
-        CHECK(mh.IsClassInitializer());
-        CHECK_STREQ(mh.GetName(), "<clinit>");
-        CHECK_STREQ(mh.GetSignature().ToString().c_str(), "()V");
-      }
+    if (method->IsClassInitializer()) {
+      DCHECK_STREQ(method->GetName(), "<clinit>");
+      DCHECK_STREQ(method->GetSignature().ToString().c_str(), "()V");
       return method;
     }
   }
@@ -517,11 +504,9 @@
 ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class?
   // Interfaces are not relevant because they can't contain instance fields.
-  FieldHelper fh;
   for (size_t i = 0; i < NumInstanceFields(); ++i) {
     ArtField* f = GetInstanceField(i);
-    fh.ChangeField(f);
-    if (name == fh.GetName() && type == fh.GetTypeDescriptor()) {
+    if (name == f->GetName() && type == f->GetTypeDescriptor()) {
       return f;
     }
   }
@@ -566,11 +551,9 @@
 
 ArtField* Class::FindDeclaredStaticField(const StringPiece& name, const StringPiece& type) {
   DCHECK(type != NULL);
-  FieldHelper fh;
   for (size_t i = 0; i < NumStaticFields(); ++i) {
     ArtField* f = GetStaticField(i);
-    fh.ChangeField(f);
-    if (name == fh.GetName() && type == fh.GetTypeDescriptor()) {
+    if (name == f->GetName() && type == f->GetTypeDescriptor()) {
       return f;
     }
   }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 62ab2c1..567ce3e 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -405,11 +405,9 @@
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr);
   if (UNLIKELY(kIsVolatile)) {
-    int32_t result = *(reinterpret_cast<volatile int32_t*>(const_cast<int32_t*>(word_addr)));
-    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
-    return result;
+    return reinterpret_cast<const Atomic<int32_t>*>(word_addr)->LoadSequentiallyConsistent();
   } else {
-    return *word_addr;
+    return reinterpret_cast<const Atomic<int32_t>*>(word_addr)->LoadJavaData();
   }
 }
 
@@ -435,11 +433,9 @@
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
-    *word_addr = new_value;
-    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
+    reinterpret_cast<Atomic<int32_t>*>(word_addr)->StoreSequentiallyConsistent(new_value);
   } else {
-    *word_addr = new_value;
+    reinterpret_cast<Atomic<int32_t>*>(word_addr)->StoreJavaData(new_value);
   }
 }
 
@@ -461,6 +457,7 @@
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
+
   return __sync_bool_compare_and_swap(addr, old_value, new_value);
 }
 
@@ -472,11 +469,9 @@
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
   if (kIsVolatile) {
-    int64_t result = QuasiAtomic::Read64(addr);
-    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
-    return result;
+    return reinterpret_cast<const Atomic<int64_t>*>(addr)->LoadSequentiallyConsistent();
   } else {
-    return *addr;
+    return reinterpret_cast<const Atomic<int64_t>*>(addr)->LoadJavaData();
   }
 }
 
@@ -502,15 +497,9 @@
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
-    QuasiAtomic::Write64(addr, new_value);
-    if (!QuasiAtomic::LongAtomicsUseMutexes()) {
-      QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
-    } else {
-      // Fence from from mutex is enough.
-    }
+    reinterpret_cast<Atomic<int64_t>*>(addr)->StoreSequentiallyConsistent(new_value);
   } else {
-    *addr = new_value;
+    reinterpret_cast<Atomic<int64_t>*>(addr)->StoreJavaData(new_value);
   }
 }
 
@@ -546,7 +535,8 @@
   HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
   T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarLoadLoad();  // Ensure loads don't re-order.
+    // TODO: Refactor to use a SequentiallyConsistent load instead.
+    QuasiAtomic::ThreadFenceAcquire();  // Ensure visibility of operations preceding store.
   }
   if (kVerifyFlags & kVerifyReads) {
     VerifyObject(result);
@@ -584,9 +574,11 @@
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
+    // TODO: Refactor to use a SequentiallyConsistent store instead.
+    QuasiAtomic::ThreadFenceRelease();  // Ensure that prior accesses are visible before store.
     objref_addr->Assign(new_value);
-    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+    QuasiAtomic::ThreadFenceSequentiallyConsistent();
+                                // Ensure this store occurs before any volatile loads.
   } else {
     objref_addr->Assign(new_value);
   }
@@ -598,8 +590,9 @@
   SetFieldObjectWithoutWriteBarrier<kTransactionActive, kCheckTransaction, kVerifyFlags,
       kIsVolatile>(field_offset, new_value);
   if (new_value != nullptr) {
-    CheckFieldAssignment(field_offset, new_value);
     Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+    // TODO: Check field assignment could theoretically cause thread suspension, TODO: fix this.
+    CheckFieldAssignment(field_offset, new_value);
   }
 }
 
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 69e5a84..422a88b 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -204,7 +204,8 @@
       for (size_t i = 0; i < num_ref_ifields; ++i) {
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
-          FieldHelper fh(field);
+          StackHandleScope<1> hs(Thread::Current());
+          FieldHelper fh(hs.NewHandle(field));
           CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
           return;
         }
@@ -222,7 +223,8 @@
       for (size_t i = 0; i < num_ref_sfields; ++i) {
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
-          FieldHelper fh(field);
+          StackHandleScope<1> hs(Thread::Current());
+          FieldHelper fh(hs.NewHandle(field));
           CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
           return;
         }
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 442909d..c082443 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -103,6 +103,13 @@
   // avoids the barriers.
   LockWord GetLockWord(bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetLockWord(LockWord new_val, bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // All Cas operations defined here have C++11 memory_order_seq_cst ordering
+  // semantics: Preceding memory operations become visible to other threads
+  // before the CAS, and subsequent operations become visible after the CAS.
+  // The Cas operations defined here do not fail spuriously, i.e. they
+  // have C++11 "strong" semantics.
+  // TODO: In most, possibly all, cases, these assumptions are too strong.
+  // Confirm and weaken the implementation.
   bool CasLockWord(LockWord old_val, LockWord new_val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
@@ -316,6 +323,7 @@
 
  private:
   // Verify the type correctness of stores to fields.
+  // TODO: This can cause thread suspension and isn't moving GC safe.
   void CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void CheckFieldAssignment(MemberOffset field_offset, Object* new_value)
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 9b6e901..c7540dc 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -19,6 +19,7 @@
 
 #include "object_array.h"
 
+#include "base/stringprintf.h"
 #include "gc/heap.h"
 #include "mirror/art_field.h"
 #include "mirror/class.h"
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 18e50ce..f85fb27 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -451,7 +451,7 @@
 
   jobject jclass_loader_1 = LoadDex("ProtoCompare");
   jobject jclass_loader_2 = LoadDex("ProtoCompare2");
-  StackHandleScope<2> hs(soa.Self());
+  StackHandleScope<4> hs(soa.Self());
   Handle<ClassLoader> class_loader_1(hs.NewHandle(soa.Decode<ClassLoader*>(jclass_loader_1)));
   Handle<ClassLoader> class_loader_2(hs.NewHandle(soa.Decode<ClassLoader*>(jclass_loader_2)));
 
@@ -461,33 +461,25 @@
   ASSERT_TRUE(klass2 != NULL);
 
   ArtMethod* m1_1 = klass1->GetVirtualMethod(0);
-  MethodHelper mh(m1_1);
-  EXPECT_STREQ(mh.GetName(), "m1");
+  EXPECT_STREQ(m1_1->GetName(), "m1");
   ArtMethod* m2_1 = klass1->GetVirtualMethod(1);
-  mh.ChangeMethod(m2_1);
-  EXPECT_STREQ(mh.GetName(), "m2");
+  EXPECT_STREQ(m2_1->GetName(), "m2");
   ArtMethod* m3_1 = klass1->GetVirtualMethod(2);
-  mh.ChangeMethod(m3_1);
-  EXPECT_STREQ(mh.GetName(), "m3");
+  EXPECT_STREQ(m3_1->GetName(), "m3");
   ArtMethod* m4_1 = klass1->GetVirtualMethod(3);
-  mh.ChangeMethod(m4_1);
-  EXPECT_STREQ(mh.GetName(), "m4");
+  EXPECT_STREQ(m4_1->GetName(), "m4");
 
   ArtMethod* m1_2 = klass2->GetVirtualMethod(0);
-  mh.ChangeMethod(m1_2);
-  EXPECT_STREQ(mh.GetName(), "m1");
+  EXPECT_STREQ(m1_2->GetName(), "m1");
   ArtMethod* m2_2 = klass2->GetVirtualMethod(1);
-  mh.ChangeMethod(m2_2);
-  EXPECT_STREQ(mh.GetName(), "m2");
+  EXPECT_STREQ(m2_2->GetName(), "m2");
   ArtMethod* m3_2 = klass2->GetVirtualMethod(2);
-  mh.ChangeMethod(m3_2);
-  EXPECT_STREQ(mh.GetName(), "m3");
+  EXPECT_STREQ(m3_2->GetName(), "m3");
   ArtMethod* m4_2 = klass2->GetVirtualMethod(3);
-  mh.ChangeMethod(m4_2);
-  EXPECT_STREQ(mh.GetName(), "m4");
+  EXPECT_STREQ(m4_2->GetName(), "m4");
 
-  mh.ChangeMethod(m1_1);
-  MethodHelper mh2(m1_2);
+  MethodHelper mh(hs.NewHandle(m1_1));
+  MethodHelper mh2(hs.NewHandle(m1_2));
   EXPECT_TRUE(mh.HasSameNameAndSignature(&mh2));
   EXPECT_TRUE(mh2.HasSameNameAndSignature(&mh));
 
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 1d79106..5c57dce 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -16,6 +16,7 @@
 
 #include "string-inl.h"
 
+#include "arch/memcmp16.h"
 #include "array.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
@@ -206,21 +207,6 @@
   return result;
 }
 
-#ifdef HAVE__MEMCMP16
-// "count" is in 16-bit units.
-extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
-#define MemCmp16 __memcmp16
-#else
-static uint32_t MemCmp16(const uint16_t* s0, const uint16_t* s1, size_t count) {
-  for (size_t i = 0; i < count; i++) {
-    if (s0[i] != s1[i]) {
-      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
-    }
-  }
-  return 0;
-}
-#endif
-
 int32_t String::CompareTo(String* rhs) {
   // Quick test for comparison of a string with itself.
   String* lhs = this;
@@ -233,13 +219,13 @@
   // *without* sign extension before it subtracts them (which makes some
   // sense since "char" is unsigned).  So what we get is the result of
   // 0x000000e9 - 0x0000ffff, which is 0xffff00ea.
-  int lhsCount = lhs->GetLength();
-  int rhsCount = rhs->GetLength();
-  int countDiff = lhsCount - rhsCount;
-  int minCount = (countDiff < 0) ? lhsCount : rhsCount;
+  int32_t lhsCount = lhs->GetLength();
+  int32_t rhsCount = rhs->GetLength();
+  int32_t countDiff = lhsCount - rhsCount;
+  int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
   const uint16_t* lhsChars = lhs->GetCharArray()->GetData() + lhs->GetOffset();
   const uint16_t* rhsChars = rhs->GetCharArray()->GetData() + rhs->GetOffset();
-  int otherRes = MemCmp16(lhsChars, rhsChars, minCount);
+  int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
   if (otherRes != 0) {
     return otherRes;
   }
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index 6874fe5..6efc9e2 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -85,16 +85,14 @@
     ObjectArray<Object>* method_trace = down_cast<ObjectArray<Object>*>(stack_state);
     int32_t depth = method_trace->GetLength() - 1;
     IntArray* pc_trace = down_cast<IntArray*>(method_trace->Get(depth));
-    MethodHelper mh;
     if (depth == 0) {
       result += "(Throwable with empty stack trace)";
     } else {
       for (int32_t i = 0; i < depth; ++i) {
-        ArtMethod* method = down_cast<ArtMethod*>(method_trace->Get(i));
-        mh.ChangeMethod(method);
+        mirror::ArtMethod* method = down_cast<ArtMethod*>(method_trace->Get(i));
         uint32_t dex_pc = pc_trace->Get(i);
-        int32_t line_number = mh.GetLineNumFromDexPC(dex_pc);
-        const char* source_file = mh.GetDeclaringClassSourceFile();
+        int32_t line_number = method->GetLineNumFromDexPC(dex_pc);
+        const char* source_file = method->GetDeclaringClassSourceFile();
         result += StringPrintf("  at %s (%s:%d)\n", PrettyMethod(method, true).c_str(),
                                source_file, line_number);
       }
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 58e6dd4..a19445b 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -694,7 +694,7 @@
       case LockWord::kUnlocked: {
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
         if (h_obj->CasLockWord(lock_word, thin_locked)) {
-          QuasiAtomic::MembarLoadLoad();
+          // CasLockWord enforces more than the acquire ordering we need here.
           return h_obj.Get();  // Success!
         }
         continue;  // Go again.
@@ -967,14 +967,13 @@
   }
 
   // <clinit> is another special case. The runtime holds the class lock while calling <clinit>.
-  MethodHelper mh(m);
-  if (mh.IsClassInitializer()) {
+  if (m->IsClassInitializer()) {
     callback(m->GetDeclaringClass(), callback_context);
     // Fall through because there might be synchronization in the user code too.
   }
 
   // Is there any reason to believe there's any synchronization in this method?
-  const DexFile::CodeItem* code_item = mh.GetCodeItem();
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
   CHECK(code_item != NULL) << PrettyMethod(m);
   if (code_item->tries_size_ == 0) {
     return;  // No "tries" implies no synchronization, so no held locks to report.
@@ -1048,12 +1047,11 @@
     *line_number = 0;
     return;
   }
-  MethodHelper mh(method);
-  *source_file = mh.GetDeclaringClassSourceFile();
+  *source_file = method->GetDeclaringClassSourceFile();
   if (*source_file == NULL) {
     *source_file = "";
   }
-  *line_number = mh.GetLineNumFromDexPC(dex_pc);
+  *line_number = method->GetLineNumFromDexPC(dex_pc);
 }
 
 uint32_t Monitor::GetOwnerThreadId() {
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 2c24e33..9512a5a 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -17,7 +17,11 @@
 #include <algorithm>
 #include <set>
 #include <fcntl.h>
+#ifdef __linux__
 #include <sys/sendfile.h>
+#else
+#include <sys/socket.h>
+#endif
 #include <sys/stat.h>
 #include <unistd.h>
 
@@ -241,7 +245,12 @@
     return;
   }
 
+#ifdef __linux__
   if (sendfile(dst.get(), src.get(), nullptr, stat_src.st_size) == -1) {
+#else
+  off_t len;
+  if (sendfile(dst.get(), src.get(), 0, &len, nullptr, 0) == -1) {
+#endif
     PLOG(ERROR) << "Failed to copy profile file " << oldfile << " to " << newfile
       << ". My uid:gid is " << getuid() << ":" << getgid();
   }
@@ -331,16 +340,7 @@
   if (Runtime::Current()->GetProfilerOptions().IsEnabled() && (pkgname != nullptr)) {
     const std::string profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */)
         + std::string("/") + pkgname;
-    const std::string profile_cache_dir = GetDalvikCacheOrDie("profile-cache",
-                                                              false /* create_if_absent */);
-
-    // Make the profile cache if it doesn't exist.
-    mkdir(profile_cache_dir.c_str(), 0700);
-
-    // The previous profile file (a copy of the profile the last time this was run) is
-    // in the dalvik-cache directory because this is owned by system.  The profiles
-    // directory is owned by install so system cannot write files in there.
-    std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname;
+    const std::string prev_profile_file = profile_file + std::string("@old");
 
     struct stat profstat, prevstat;
     int e1 = stat(profile_file.c_str(), &profstat);
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 7490e6a..820bd04 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -32,9 +32,11 @@
 static void EnableDebugger() {
   // To let a non-privileged gdbserver attach to this
   // process, we must set our dumpable flag.
+#if defined(HAVE_PRCTL)
   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
     PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
   }
+#endif
   // We don't want core dumps, though, so set the core dump size to 0.
   rlimit rl;
   rl.rlim_cur = 0;
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 0d54772..3564dfd 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -90,12 +90,13 @@
 }
 
 static bool CheckReceiver(const ScopedFastNativeObjectAccess& soa, jobject j_rcvr,
-                          mirror::ArtField* f, mirror::Object** class_or_rcvr)
+                          mirror::ArtField** f, mirror::Object** class_or_rcvr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   soa.Self()->AssertThreadSuspensionIsAllowable();
-  if (f->IsStatic()) {
-    StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::Class> h_klass(hs.NewHandle(f->GetDeclaringClass()));
+  if ((*f)->IsStatic()) {
+    StackHandleScope<2> hs(soa.Self());
+    HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(f));
+    Handle<mirror::Class> h_klass(hs.NewHandle((*f)->GetDeclaringClass()));
     if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_klass, true, true))) {
       DCHECK(soa.Self()->IsExceptionPending());
       *class_or_rcvr = nullptr;
@@ -106,7 +107,7 @@
   }
 
   *class_or_rcvr = soa.Decode<mirror::Object*>(j_rcvr);
-  mirror::Class* declaringClass = f->GetDeclaringClass();
+  mirror::Class* declaringClass = (*f)->GetDeclaringClass();
   if (!VerifyObjectIsClass(*class_or_rcvr, declaringClass)) {
     DCHECK(soa.Self()->IsExceptionPending());
     *class_or_rcvr = nullptr;
@@ -117,10 +118,9 @@
 
 static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
-  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
@@ -131,7 +131,7 @@
   }
   // We now don't expect suspension unless an exception is thrown.
   // Get the field's value, boxing if necessary.
-  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
+  Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   JValue value;
   if (!GetFieldValue(soa, o, f, field_type, true, &value)) {
     DCHECK(soa.Self()->IsExceptionPending());
@@ -143,10 +143,9 @@
 static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
                                 char dst_descriptor, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
-  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
@@ -159,7 +158,7 @@
 
   // We now don't expect suspension unless an exception is thrown.
   // Read the value.
-  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
+  Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   JValue field_value;
   if (!GetFieldValue(soa, o, f, field_type, false, &field_value)) {
     DCHECK(soa.Self()->IsExceptionPending());
@@ -257,33 +256,29 @@
 static void Field_set(JNIEnv* env, jobject javaField, jobject javaObj, jobject javaValue,
                       jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
-  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   // Check that the receiver is non-null and an instance of the field's declaring class.
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
-  Primitive::Type field_prim_type;
   mirror::Class* field_type;
-  {
-    FieldHelper fh(f);
-    const char* field_type_desciptor = fh.GetTypeDescriptor();
-    field_prim_type = Primitive::GetType(field_type_desciptor[0]);
-    if (field_prim_type == Primitive::kPrimNot) {
-      StackHandleScope<1> hs(soa.Self());
-      HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(&o));
-      // May cause resolution.
-      CHECK(!kMovingFields) << "Resolution may trigger thread suspension";
-      field_type = fh.GetType(true);
-      if (field_type == nullptr) {
-        DCHECK(soa.Self()->IsExceptionPending());
-        return;
-      }
-    } else {
-      field_type = Runtime::Current()->GetClassLinker()->FindPrimitiveClass(field_type_desciptor[0]);
+  const char* field_type_desciptor = f->GetTypeDescriptor();
+  Primitive::Type field_prim_type = Primitive::GetType(field_type_desciptor[0]);
+  if (field_prim_type == Primitive::kPrimNot) {
+    StackHandleScope<2> hs(soa.Self());
+    HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
+    HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+    FieldHelper fh(h_f);
+    // May cause resolution.
+    field_type = fh.GetType(true);
+    if (field_type == nullptr) {
+      DCHECK(soa.Self()->IsExceptionPending());
+      return;
     }
+  } else {
+    field_type = Runtime::Current()->GetClassLinker()->FindPrimitiveClass(field_type_desciptor[0]);
   }
   // We now don't expect suspension unless an exception is thrown.
   // Unbox the value, if necessary.
@@ -306,10 +301,10 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     return;
   }
-  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
+  Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   if (UNLIKELY(field_type == Primitive::kPrimNot)) {
     ThrowIllegalArgumentException(nullptr, StringPrintf("Not a primitive field: %s",
                                                         PrettyField(f).c_str()).c_str());
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 764db5e..d23cfff 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -83,7 +83,7 @@
                                  jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  QuasiAtomic::MembarStoreStore();
+  QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetField32<false>(MemberOffset(offset), newValue);
 }
@@ -119,7 +119,7 @@
                                   jlong newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  QuasiAtomic::MembarStoreStore();
+  QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetField64<false>(MemberOffset(offset), newValue);
 }
@@ -161,7 +161,7 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
-  QuasiAtomic::MembarStoreStore();
+  QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetFieldObject<false>(MemberOffset(offset), newValue);
 }
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 96834b8..f4721f2 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '3', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '5', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 74dfe91..6c44aa9 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -292,11 +292,14 @@
       return false;
     }
 
-    oat_dex_files_.Put(dex_file_location, new OatDexFile(this,
-                                                         dex_file_location,
-                                                         dex_file_checksum,
-                                                         dex_file_pointer,
-                                                         methods_offsets_pointer));
+    OatDexFile* oat_dex_file = new OatDexFile(this,
+                                              dex_file_location,
+                                              dex_file_checksum,
+                                              dex_file_pointer,
+                                              methods_offsets_pointer);
+    // Use a StringPiece backed by the oat_dex_file's internal std::string as the key.
+    StringPiece key(oat_dex_file->GetDexFileLocation());
+    oat_dex_files_.Put(key, oat_dex_file);
   }
   return true;
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index d703731..eae0418 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include "base/stringpiece.h"
 #include "dex_file.h"
 #include "invoke_type.h"
 #include "mem_map.h"
@@ -206,11 +207,11 @@
                const byte* dex_file_pointer,
                const uint32_t* oat_class_offsets_pointer);
 
-    const OatFile* oat_file_;
-    std::string dex_file_location_;
-    uint32_t dex_file_location_checksum_;
-    const byte* dex_file_pointer_;
-    const uint32_t* oat_class_offsets_pointer_;
+    const OatFile* const oat_file_;
+    const std::string dex_file_location_;
+    const uint32_t dex_file_location_checksum_;
+    const byte* const dex_file_pointer_;
+    const uint32_t* const oat_class_offsets_pointer_;
 
     friend class OatFile;
     DISALLOW_COPY_AND_ASSIGN(OatDexFile);
@@ -270,7 +271,9 @@
   // dlopen handle during runtime.
   void* dlopen_handle_;
 
-  typedef SafeMap<std::string, const OatDexFile*> Table;
+  // NOTE: We use a StringPiece as the key type to avoid a memory allocation on every lookup
+  // with a const char* key.
+  typedef SafeMap<StringPiece, const OatDexFile*> Table;
   Table oat_dex_files_;
 
   friend class OatClass;
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 664ac89..28ce8f3 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -68,68 +68,33 @@
 
 class FieldHelper {
  public:
-  FieldHelper() : field_(nullptr) {}
-  explicit FieldHelper(mirror::ArtField* f) : field_(f) {}
+  explicit FieldHelper(Handle<mirror::ArtField> f) : field_(f) {}
 
   void ChangeField(mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(new_f != nullptr);
-    field_ = new_f;
+    field_.Assign(new_f);
   }
 
-  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uint32_t field_index = field_->GetDexFieldIndex();
-    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      DCHECK(field_->IsStatic());
-      DCHECK_LT(field_index, 2U);
-      return field_index == 0 ? "interfaces" : "throws";
-    }
-    const DexFile& dex_file = GetDexFile();
-    return dex_file.GetFieldName(dex_file.GetFieldId(field_index));
+  mirror::ArtField* GetField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return field_.Get();
   }
 
   mirror::Class* GetType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
     if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      return GetClassLinker()->FindSystemClass(Thread::Current(), GetTypeDescriptor());
+      return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
+                                                                   field_->GetTypeDescriptor());
     }
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-    mirror::Class* type = GetDexCache()->GetResolvedType(field_id.type_idx_);
+    const DexFile* dex_file = field_->GetDexFile();
+    const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+    mirror::Class* type = field_->GetDexCache()->GetResolvedType(field_id.type_idx_);
     if (resolve && (type == nullptr)) {
-      type = GetClassLinker()->ResolveType(field_id.type_idx_, field_);
+      type = Runtime::Current()->GetClassLinker()->ResolveType(field_id.type_idx_, field_.Get());
       CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
     }
     return type;
   }
 
-  const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uint32_t field_index = field_->GetDexFieldIndex();
-    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      DCHECK(field_->IsStatic());
-      DCHECK_LT(field_index, 2U);
-      // 0 == Class[] interfaces; 1 == Class[][] throws;
-      return field_index == 0 ? "[Ljava/lang/Class;" : "[[Ljava/lang/Class;";
-    }
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-    return dex_file.GetFieldTypeDescriptor(field_id);
-  }
-
-  Primitive::Type GetTypeAsPrimitiveType()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return Primitive::GetType(GetTypeDescriptor()[0]);
-  }
-
-  bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Primitive::Type type = GetTypeAsPrimitiveType();
-    return type != Primitive::kPrimNot;
-  }
-
-  size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Primitive::Type type = GetTypeAsPrimitiveType();
-    return Primitive::FieldSize(type);
-  }
-
   // The returned const char* is only guaranteed to be valid for the lifetime of the FieldHelper.
   // If you need it longer, copy it into a std::string.
   const char* GetDeclaringClassDescriptor()
@@ -142,22 +107,13 @@
       declaring_class_descriptor_ = field_->GetDeclaringClass()->GetDescriptor();
       return declaring_class_descriptor_.c_str();
     }
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-    return dex_file.GetFieldDeclaringClassDescriptor(field_id);
+    const DexFile* dex_file = field_->GetDexFile();
+    const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+    return dex_file->GetFieldDeclaringClassDescriptor(field_id);
   }
 
  private:
-  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return field_->GetDeclaringClass()->GetDexCache();
-  }
-  ClassLinker* GetClassLinker() ALWAYS_INLINE {
-    return Runtime::Current()->GetClassLinker();
-  }
-  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return *GetDexCache()->GetDexFile();
-  }
-  mirror::ArtField* field_;
+  Handle<mirror::ArtField> field_;
   std::string declaring_class_descriptor_;
 
   DISALLOW_COPY_AND_ASSIGN(FieldHelper);
@@ -165,12 +121,9 @@
 
 class MethodHelper {
  public:
-  MethodHelper() : method_(nullptr), shorty_(nullptr), shorty_len_(0) {}
-
-  explicit MethodHelper(mirror::ArtMethod* m)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : method_(nullptr), shorty_(nullptr), shorty_len_(0) {
-    SetMethod(m);
+  explicit MethodHelper(Handle<mirror::ArtMethod> m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : method_(m), shorty_(nullptr), shorty_len_(0) {
+    SetMethod(m.Get());
   }
 
   void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -179,48 +132,25 @@
     shorty_ = nullptr;
   }
 
-  mirror::ArtMethod* GetMethod() const {
-    return method_;
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return method_->GetInterfaceMethodIfProxy();
   }
 
-  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    uint32_t dex_method_idx = method_->GetDexMethodIndex();
-    if (LIKELY(dex_method_idx != DexFile::kDexNoIndex)) {
-      return dex_file.GetMethodName(dex_file.GetMethodId(dex_method_idx));
-    } else {
-      Runtime* runtime = Runtime::Current();
-      if (method_ == runtime->GetResolutionMethod()) {
-        return "<runtime internal resolution method>";
-      } else if (method_ == runtime->GetImtConflictMethod()) {
-        return "<runtime internal imt conflict method>";
-      } else if (method_ == runtime->GetCalleeSaveMethod(Runtime::kSaveAll)) {
-        return "<runtime internal callee-save all registers method>";
-      } else if (method_ == runtime->GetCalleeSaveMethod(Runtime::kRefsOnly)) {
-        return "<runtime internal callee-save reference registers method>";
-      } else if (method_ == runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs)) {
-        return "<runtime internal callee-save reference and argument registers method>";
-      } else {
-        return "<unknown runtime internal method>";
-      }
-    }
-  }
-
-  mirror::String* GetNameAsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    uint32_t dex_method_idx = method_->GetDexMethodIndex();
-    const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
-    StackHandleScope<1> hs(Thread::Current());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache()));
-    return GetClassLinker()->ResolveString(dex_file, method_id.name_idx_, dex_cache);
+  mirror::String* GetNameAsString(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile* dex_file = method_->GetDexFile();
+    mirror::ArtMethod* method = method_->GetInterfaceMethodIfProxy();
+    uint32_t dex_method_idx = method->GetDexMethodIndex();
+    const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+    return Runtime::Current()->GetClassLinker()->ResolveString(*dex_file, method_id.name_idx_,
+                                                               dex_cache);
   }
 
   const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const char* result = shorty_;
     if (result == nullptr) {
-      const DexFile& dex_file = GetDexFile();
-      result = dex_file.GetMethodShorty(dex_file.GetMethodId(method_->GetDexMethodIndex()),
-                                        &shorty_len_);
+      result = method_->GetShorty(&shorty_len_);
       shorty_ = result;
     }
     return result;
@@ -247,94 +177,27 @@
     return refs;
   }
 
-  const Signature GetSignature() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    uint32_t dex_method_idx = method_->GetDexMethodIndex();
-    if (dex_method_idx != DexFile::kDexNoIndex) {
-      return dex_file.GetMethodSignature(dex_file.GetMethodId(dex_method_idx));
-    } else {
-      return Signature::NoSignature();
-    }
-  }
-
-  const DexFile::ProtoId& GetPrototype() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    return dex_file.GetMethodPrototype(dex_file.GetMethodId(method_->GetDexMethodIndex()));
-  }
-
-  const DexFile::TypeList* GetParameterTypeList() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile::ProtoId& proto = GetPrototype();
-    return GetDexFile().GetProtoParameters(proto);
-  }
-
+  // May cause thread suspension due to GetClassFromTypeIdx calling ResolveType this caused a large
+  // number of bugs at call sites.
   mirror::Class* GetReturnType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::MethodId& method_id = dex_file.GetMethodId(method_->GetDexMethodIndex());
-    const DexFile::ProtoId& proto_id = dex_file.GetMethodPrototype(method_id);
+    mirror::ArtMethod* method = GetMethod();
+    const DexFile* dex_file = method->GetDexFile();
+    const DexFile::MethodId& method_id = dex_file->GetMethodId(method->GetDexMethodIndex());
+    const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
     uint16_t return_type_idx = proto_id.return_type_idx_;
     return GetClassFromTypeIdx(return_type_idx, resolve);
   }
 
-  const char* GetReturnTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::MethodId& method_id = dex_file.GetMethodId(method_->GetDexMethodIndex());
-    const DexFile::ProtoId& proto_id = dex_file.GetMethodPrototype(method_id);
-    uint16_t return_type_idx = proto_id.return_type_idx_;
-    return dex_file.GetTypeDescriptor(dex_file.GetTypeId(return_type_idx));
-  }
-
-  int32_t GetLineNumFromDexPC(uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (dex_pc == DexFile::kDexNoIndex) {
-      return method_->IsNative() ? -2 : -1;
-    } else {
-      const DexFile& dex_file = GetDexFile();
-      return dex_file.GetLineNumFromPC(method_, dex_pc);
-    }
-  }
-
-  const char* GetDeclaringClassDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    uint32_t dex_method_idx = method_->GetDexMethodIndex();
-    if (UNLIKELY(dex_method_idx == DexFile::kDexNoIndex)) {
-      return "<runtime method>";
-    }
-    return dex_file.GetMethodDeclaringClassDescriptor(dex_file.GetMethodId(dex_method_idx));
-  }
-
-  const char* GetDeclaringClassSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDeclaringClass()->GetSourceFile();
-  }
-
-  uint16_t GetClassDefIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDeclaringClass()->GetDexClassDefIndex();
-  }
-
-  const DexFile::ClassDef& GetClassDef() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetDexFile().GetClassDef(GetClassDefIndex());
-  }
-
-  mirror::ClassLoader* GetClassLoader() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDeclaringClass()->GetClassLoader();
-  }
-
-  bool IsStatic() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->IsStatic();
-  }
-
-  bool IsClassInitializer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->IsConstructor() && IsStatic();
-  }
-
   size_t NumArgs() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // "1 +" because the first in Args is the receiver.
     // "- 1" because we don't count the return type.
-    return (IsStatic() ? 0 : 1) + GetShortyLength() - 1;
+    return (method_->IsStatic() ? 0 : 1) + GetShortyLength() - 1;
   }
 
   // Get the primitive type associated with the given parameter.
   Primitive::Type GetParamPrimitiveType(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK_LT(param, NumArgs());
-    if (IsStatic()) {
+    if (GetMethod()->IsStatic()) {
       param++;  // 0th argument must skip return value at start of the shorty
     } else if (param == 0) {
       return Primitive::kPrimNot;
@@ -354,21 +217,20 @@
   }
 
   bool HasSameNameAndSignature(MethodHelper* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
-    if (GetDexCache() == other->GetDexCache()) {
+    const DexFile* dex_file = method_->GetDexFile();
+    const DexFile::MethodId& mid = dex_file->GetMethodId(GetMethod()->GetDexMethodIndex());
+    if (method_->GetDexCache() == other->method_->GetDexCache()) {
       const DexFile::MethodId& other_mid =
-          dex_file.GetMethodId(other->method_->GetDexMethodIndex());
+          dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
       return mid.name_idx_ == other_mid.name_idx_ && mid.proto_idx_ == other_mid.proto_idx_;
     }
-    const DexFile& other_dex_file = other->GetDexFile();
+    const DexFile* other_dex_file = other->method_->GetDexFile();
     const DexFile::MethodId& other_mid =
-        other_dex_file.GetMethodId(other->method_->GetDexMethodIndex());
-    if (!DexFileStringEquals(&dex_file, mid.name_idx_,
-                             &other_dex_file, other_mid.name_idx_)) {
+        other_dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
+    if (!DexFileStringEquals(dex_file, mid.name_idx_, other_dex_file, other_mid.name_idx_)) {
       return false;  // Name mismatch.
     }
-    return dex_file.GetMethodSignature(mid) == other_dex_file.GetMethodSignature(other_mid);
+    return dex_file->GetMethodSignature(mid) == other_dex_file->GetMethodSignature(other_mid);
   }
 
   bool HasSameSignatureWithDifferentClassLoaders(MethodHelper* other)
@@ -376,8 +238,8 @@
     if (UNLIKELY(GetReturnType() != other->GetReturnType())) {
       return false;
     }
-    const DexFile::TypeList* types = GetParameterTypeList();
-    const DexFile::TypeList* other_types = other->GetParameterTypeList();
+    const DexFile::TypeList* types = method_->GetParameterTypeList();
+    const DexFile::TypeList* other_types = other->method_->GetParameterTypeList();
     if (types == nullptr) {
       return (other_types == nullptr) || (other_types->Size() == 0);
     } else if (UNLIKELY(other_types == nullptr)) {
@@ -398,84 +260,63 @@
     return true;
   }
 
-  const DexFile::CodeItem* GetCodeItem()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetDexFile().GetCodeItem(method_->GetCodeItemOffset());
-  }
-
-  bool IsResolvedTypeIdx(uint16_t type_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDexCacheResolvedTypes()->Get(type_idx) != nullptr;
-  }
-
   mirror::Class* GetClassFromTypeIdx(uint16_t type_idx, bool resolve = true)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::Class* type = method_->GetDexCacheResolvedTypes()->Get(type_idx);
+    mirror::ArtMethod* method = GetMethod();
+    mirror::Class* type = method->GetDexCacheResolvedTypes()->Get(type_idx);
     if (type == nullptr && resolve) {
-      type = GetClassLinker()->ResolveType(type_idx, method_);
+      type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
       CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
     }
     return type;
   }
 
-  const char* GetTypeDescriptorFromTypeIdx(uint16_t type_idx)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dex_file = GetDexFile();
-    return dex_file.GetTypeDescriptor(dex_file.GetTypeId(type_idx));
-  }
-
   mirror::Class* GetDexCacheResolvedType(uint16_t type_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDexCacheResolvedTypes()->Get(type_idx);
-  }
-
-  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return *GetDexCache()->GetDexFile();
-  }
-
-  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDeclaringClass()->GetDexCache();
+    return GetMethod()->GetDexCacheResolvedTypes()->Get(type_idx);
   }
 
   mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::String* s = method_->GetDexCacheStrings()->Get(string_idx);
+    mirror::ArtMethod* method = GetMethod();
+    mirror::String* s = method->GetDexCacheStrings()->Get(string_idx);
     if (UNLIKELY(s == nullptr)) {
       StackHandleScope<1> hs(Thread::Current());
-      Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache()));
-      s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, dex_cache);
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+      s = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(), string_idx,
+                                                              dex_cache);
     }
     return s;
   }
 
   uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dexfile = GetDexFile();
-    if (&dexfile == &other_dexfile) {
-      return method_->GetDexMethodIndex();
+    mirror::ArtMethod* method = GetMethod();
+    const DexFile* dexfile = method->GetDexFile();
+    if (dexfile == &other_dexfile) {
+      return method->GetDexMethodIndex();
     }
-    const DexFile::MethodId& mid = dexfile.GetMethodId(method_->GetDexMethodIndex());
-    const char* mid_declaring_class_descriptor = dexfile.StringByTypeIdx(mid.class_idx_);
+    const DexFile::MethodId& mid = dexfile->GetMethodId(method->GetDexMethodIndex());
+    const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
     const DexFile::StringId* other_descriptor =
         other_dexfile.FindStringId(mid_declaring_class_descriptor);
     if (other_descriptor != nullptr) {
       const DexFile::TypeId* other_type_id =
           other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
       if (other_type_id != nullptr) {
-        const char* mid_name = dexfile.GetMethodName(mid);
+        const char* mid_name = dexfile->GetMethodName(mid);
         const DexFile::StringId* other_name = other_dexfile.FindStringId(mid_name);
         if (other_name != nullptr) {
           uint16_t other_return_type_idx;
           std::vector<uint16_t> other_param_type_idxs;
-          bool success = other_dexfile.CreateTypeList(dexfile.GetMethodSignature(mid).ToString(),
-                                                      &other_return_type_idx,
-                                                      &other_param_type_idxs);
+          bool success = other_dexfile.CreateTypeList(
+              dexfile->GetMethodSignature(mid).ToString(), &other_return_type_idx,
+              &other_param_type_idxs);
           if (success) {
             const DexFile::ProtoId* other_sig =
                 other_dexfile.FindProtoId(other_return_type_idx, other_param_type_idxs);
             if (other_sig != nullptr) {
-              const  DexFile::MethodId* other_mid = other_dexfile.FindMethodId(*other_type_id,
-                                                                               *other_name,
-                                                                               *other_sig);
+              const  DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
+                  *other_type_id, *other_name, *other_sig);
               if (other_mid != nullptr) {
                 return other_dexfile.GetIndexForMethodId(*other_mid);
               }
@@ -492,15 +333,17 @@
   uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
                                             uint32_t name_and_signature_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile& dexfile = GetDexFile();
-    const DexFile::MethodId& mid = dexfile.GetMethodId(method_->GetDexMethodIndex());
+    mirror::ArtMethod* method = GetMethod();
+    const DexFile* dexfile = method->GetDexFile();
+    const uint32_t dex_method_idx = method->GetDexMethodIndex();
+    const DexFile::MethodId& mid = dexfile->GetMethodId(dex_method_idx);
     const DexFile::MethodId& name_and_sig_mid = other_dexfile.GetMethodId(name_and_signature_idx);
-    DCHECK_STREQ(dexfile.GetMethodName(mid), other_dexfile.GetMethodName(name_and_sig_mid));
-    DCHECK_EQ(dexfile.GetMethodSignature(mid), other_dexfile.GetMethodSignature(name_and_sig_mid));
-    if (&dexfile == &other_dexfile) {
-      return method_->GetDexMethodIndex();
+    DCHECK_STREQ(dexfile->GetMethodName(mid), other_dexfile.GetMethodName(name_and_sig_mid));
+    DCHECK_EQ(dexfile->GetMethodSignature(mid), other_dexfile.GetMethodSignature(name_and_sig_mid));
+    if (dexfile == &other_dexfile) {
+      return dex_method_idx;
     }
-    const char* mid_declaring_class_descriptor = dexfile.StringByTypeIdx(mid.class_idx_);
+    const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
     const DexFile::StringId* other_descriptor =
         other_dexfile.FindStringId(mid_declaring_class_descriptor);
     if (other_descriptor != nullptr) {
@@ -522,24 +365,10 @@
   // Set the method_ field, for proxy methods looking up the interface method via the resolved
   // methods table.
   void SetMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (method != nullptr) {
-      mirror::Class* klass = method->GetDeclaringClass();
-      if (UNLIKELY(klass->IsProxyClass())) {
-        mirror::ArtMethod* interface_method =
-            method->GetDexCacheResolvedMethods()->Get(method->GetDexMethodIndex());
-        DCHECK(interface_method != nullptr);
-        DCHECK(interface_method == GetClassLinker()->FindMethodForProxy(klass, method));
-        method = interface_method;
-      }
-    }
-    method_ = method;
+    method_.Assign(method);
   }
 
-  ClassLinker* GetClassLinker() ALWAYS_INLINE {
-    return Runtime::Current()->GetClassLinker();
-  }
-
-  mirror::ArtMethod* method_;
+  Handle<mirror::ArtMethod> method_;
   const char* shorty_;
   uint32_t shorty_len_;
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 5d9a3ae..0820330 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -15,13 +15,16 @@
  */
 
 #include "parsed_options.h"
-#include "utils.h"
+
 #ifdef HAVE_ANDROID_OS
 #include "cutils/properties.h"
 #endif
 
+#include "base/stringpiece.h"
 #include "debugger.h"
+#include "gc/heap.h"
 #include "monitor.h"
+#include "utils.h"
 
 namespace art {
 
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index a27eec6..d0f3c12 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -19,6 +19,7 @@
 
 #include <string>
 
+#include "gc/collector_type.h"
 #include "runtime.h"
 #include "trace.h"
 
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index bad79b3..00bb501 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -378,8 +378,7 @@
 
   bool is_filtered = false;
 
-  MethodHelper mh(method);
-  if (strcmp(mh.GetName(), "<clinit>") == 0) {
+  if (strcmp(method->GetName(), "<clinit>") == 0) {
     // always filter out class init
     is_filtered = true;
   }
@@ -460,8 +459,7 @@
         mirror::ArtMethod *method = meth_iter.first;
         std::string method_name = PrettyMethod(method);
 
-        MethodHelper mh(method);
-        const DexFile::CodeItem* codeitem = mh.GetCodeItem();
+        const DexFile::CodeItem* codeitem = method->GetCodeItem();
         uint32_t method_size = 0;
         if (codeitem != nullptr) {
           method_size = codeitem->insns_size_in_code_units_;
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 9724bcc..093c129 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -140,52 +140,60 @@
 TEST_F(ProxyTest, ProxyFieldHelper) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Interfaces");
-  StackHandleScope<1> hs(soa.Self());
+  StackHandleScope<9> hs(soa.Self());
   Handle<mirror::ClassLoader> class_loader(
       hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
 
-  mirror::Class* I = class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader);
-  mirror::Class* J = class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader);
-  ASSERT_TRUE(I != nullptr);
-  ASSERT_TRUE(J != nullptr);
-  std::vector<mirror::Class*> interfaces;
-  interfaces.push_back(I);
-  interfaces.push_back(J);
+  Handle<mirror::Class> I(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader)));
+  Handle<mirror::Class> J(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader)));
+  ASSERT_TRUE(I.Get() != nullptr);
+  ASSERT_TRUE(J.Get() != nullptr);
 
-  mirror::Class* proxyClass = GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces);
-  ASSERT_TRUE(proxyClass != nullptr);
+  Handle<mirror::Class> proxyClass;
+  {
+    std::vector<mirror::Class*> interfaces;
+    interfaces.push_back(I.Get());
+    interfaces.push_back(J.Get());
+    proxyClass = hs.NewHandle(GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces));
+  }
+
+  ASSERT_TRUE(proxyClass.Get() != nullptr);
   ASSERT_TRUE(proxyClass->IsProxyClass());
   ASSERT_TRUE(proxyClass->IsInitialized());
 
-  mirror::ObjectArray<mirror::ArtField>* instance_fields = proxyClass->GetIFields();
-  EXPECT_TRUE(instance_fields == nullptr);
+  Handle<mirror::ObjectArray<mirror::ArtField>> instance_fields(
+      hs.NewHandle(proxyClass->GetIFields()));
+  EXPECT_TRUE(instance_fields.Get() == nullptr);
 
-  mirror::ObjectArray<mirror::ArtField>* static_fields = proxyClass->GetSFields();
-  ASSERT_TRUE(static_fields != nullptr);
+  Handle<mirror::ObjectArray<mirror::ArtField>> static_fields(
+      hs.NewHandle(proxyClass->GetSFields()));
+  ASSERT_TRUE(static_fields.Get() != nullptr);
   ASSERT_EQ(2, static_fields->GetLength());
 
-  mirror::Class* interfacesFieldClass = class_linker_->FindSystemClass(soa.Self(),
-                                                                       "[Ljava/lang/Class;");
-  ASSERT_TRUE(interfacesFieldClass != nullptr);
-  mirror::Class* throwsFieldClass = class_linker_->FindSystemClass(soa.Self(),
-                                                                   "[[Ljava/lang/Class;");
-  ASSERT_TRUE(throwsFieldClass != nullptr);
+  Handle<mirror::Class> interfacesFieldClass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Class;")));
+  ASSERT_TRUE(interfacesFieldClass.Get() != nullptr);
+  Handle<mirror::Class> throwsFieldClass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Class;")));
+  ASSERT_TRUE(throwsFieldClass.Get() != nullptr);
 
   // Test "Class[] interfaces" field.
-  FieldHelper fh(static_fields->Get(0));
-  EXPECT_EQ("interfaces", std::string(fh.GetName()));
-  EXPECT_EQ("[Ljava/lang/Class;", std::string(fh.GetTypeDescriptor()));
-  EXPECT_EQ(interfacesFieldClass, fh.GetType());
+  FieldHelper fh(hs.NewHandle(static_fields->Get(0)));
+  EXPECT_EQ("interfaces", std::string(fh.GetField()->GetName()));
+  EXPECT_EQ("[Ljava/lang/Class;", std::string(fh.GetField()->GetTypeDescriptor()));
+  EXPECT_EQ(interfacesFieldClass.Get(), fh.GetType());
   EXPECT_EQ("L$Proxy1234;", std::string(fh.GetDeclaringClassDescriptor()));
-  EXPECT_FALSE(fh.IsPrimitiveType());
+  EXPECT_FALSE(fh.GetField()->IsPrimitiveType());
 
   // Test "Class[][] throws" field.
   fh.ChangeField(static_fields->Get(1));
-  EXPECT_EQ("throws", std::string(fh.GetName()));
-  EXPECT_EQ("[[Ljava/lang/Class;", std::string(fh.GetTypeDescriptor()));
-  EXPECT_EQ(throwsFieldClass, fh.GetType());
+  EXPECT_EQ("throws", std::string(fh.GetField()->GetName()));
+  EXPECT_EQ("[[Ljava/lang/Class;", std::string(fh.GetField()->GetTypeDescriptor()));
+  EXPECT_EQ(throwsFieldClass.Get(), fh.GetType());
   EXPECT_EQ("L$Proxy1234;", std::string(fh.GetDeclaringClassDescriptor()));
-  EXPECT_FALSE(fh.IsPrimitiveType());
+  EXPECT_FALSE(fh.GetField()->IsPrimitiveType());
 }
 
 }  // namespace art
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index b9cec40..1034923 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -71,11 +71,13 @@
       DCHECK(method->IsCalleeSaveMethod());
       return true;
     }
-    return HandleTryItems(method);
+    StackHandleScope<1> hs(self_);
+    return HandleTryItems(hs.NewHandle(method));
   }
 
  private:
-  bool HandleTryItems(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HandleTryItems(Handle<mirror::ArtMethod> method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t dex_pc = DexFile::kDexNoIndex;
     if (!method->IsNative()) {
       dex_pc = GetDexPc();
@@ -84,10 +86,11 @@
       bool clear_exception = false;
       StackHandleScope<1> hs(Thread::Current());
       Handle<mirror::Class> to_find(hs.NewHandle((*exception_)->GetClass()));
-      uint32_t found_dex_pc = method->FindCatchBlock(to_find, dex_pc, &clear_exception);
+      uint32_t found_dex_pc = mirror::ArtMethod::FindCatchBlock(method, to_find, dex_pc,
+                                                                &clear_exception);
       exception_handler_->SetClearException(clear_exception);
       if (found_dex_pc != DexFile::kDexNoIndex) {
-        exception_handler_->SetHandlerMethod(method);
+        exception_handler_->SetHandlerMethod(method.Get());
         exception_handler_->SetHandlerDexPc(found_dex_pc);
         exception_handler_->SetHandlerQuickFramePc(method->ToNativePc(found_dex_pc));
         exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
@@ -107,7 +110,8 @@
 };
 
 void QuickExceptionHandler::FindCatch(const ThrowLocation& throw_location,
-                                      mirror::Throwable* exception) {
+                                      mirror::Throwable* exception,
+                                      bool is_exception_reported) {
   DCHECK(!is_deoptimization_);
   if (kDebugExceptionDelivery) {
     mirror::String* msg = exception->GetDetailMessage();
@@ -138,12 +142,24 @@
   } else {
     // Put exception back in root set with clear throw location.
     self_->SetException(ThrowLocation(), exception_ref.Get());
+    self_->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
   // The debugger may suspend this thread and walk its stack. Let's do this before popping
   // instrumentation frames.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  instrumentation->ExceptionCaughtEvent(self_, throw_location, handler_method_, handler_dex_pc_,
-                                        exception_ref.Get());
+  if (!is_exception_reported) {
+    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    instrumentation->ExceptionCaughtEvent(self_, throw_location, handler_method_, handler_dex_pc_,
+                                          exception_ref.Get());
+      // We're not catching this exception but let's remind we already reported the exception above
+      // to avoid reporting it twice.
+      self_->SetExceptionReportedToInstrumentation(true);
+  }
+  bool caught_exception = (handler_method_ != nullptr && handler_dex_pc_ != DexFile::kDexNoIndex);
+  if (caught_exception) {
+    // We're catching this exception so we finish reporting it. We do it here to avoid doing it
+    // in the compiled code.
+    self_->SetExceptionReportedToInstrumentation(false);
+  }
 }
 
 // Prepares deoptimization.
@@ -175,8 +191,7 @@
 
  private:
   bool HandleDeoptimization(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    MethodHelper mh(m);
-    const DexFile::CodeItem* code_item = mh.GetCodeItem();
+    const DexFile::CodeItem* code_item = m->GetCodeItem();
     CHECK(code_item != nullptr);
     uint16_t num_regs = code_item->registers_size_;
     uint32_t dex_pc = GetDexPc();
@@ -184,10 +199,11 @@
     uint32_t new_dex_pc = dex_pc + inst->SizeInCodeUnits();
     ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, m, new_dex_pc);
     StackHandleScope<2> hs(self_);
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
-    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
-    verifier::MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader,
-                                      &mh.GetClassDef(), code_item, m->GetDexMethodIndex(), m,
+    mirror::Class* declaring_class = m->GetDeclaringClass();
+    Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
+    Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
+    verifier::MethodVerifier verifier(h_dex_cache->GetDexFile(), &h_dex_cache, &h_class_loader,
+                                      &m->GetClassDef(), code_item, m->GetDexMethodIndex(), m,
                                       m->GetAccessFlags(), false, true, true);
     verifier.Verify();
     std::vector<int32_t> kinds = verifier.DescribeVRegs(dex_pc);
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index a4229b3..1d600ed 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -42,7 +42,8 @@
     LOG(FATAL) << "UNREACHABLE";  // Expected to take long jump.
   }
 
-  void FindCatch(const ThrowLocation& throw_location, mirror::Throwable* exception)
+  void FindCatch(const ThrowLocation& throw_location, mirror::Throwable* exception,
+                 bool is_exception_reported)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DeoptimizeStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void UpdateInstrumentationStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index c08cc30..fe5e104 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -222,7 +222,7 @@
                                     mirror::Object* receiver,
                                     mirror::ObjectArray<mirror::Object>* args, MethodHelper& mh)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile::TypeList* classes = mh.GetParameterTypeList();
+    const DexFile::TypeList* classes = mh.GetMethod()->GetParameterTypeList();
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
       Append(receiver);
@@ -349,7 +349,7 @@
 
 static void CheckMethodArguments(mirror::ArtMethod* m, uint32_t* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const DexFile::TypeList* params = MethodHelper(m).GetParameterTypeList();
+  const DexFile::TypeList* params = m->GetParameterTypeList();
   if (params == nullptr) {
     return;  // No arguments so nothing to check.
   }
@@ -359,24 +359,31 @@
   if (!m->IsStatic()) {
     offset = 1;
   }
+  // TODO: If args contain object references, it may cause problems
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ArtMethod> h_m(hs.NewHandle(m));
+  MethodHelper mh(h_m);
   for (uint32_t i = 0; i < num_params; i++) {
     uint16_t type_idx = params->GetTypeItem(i).type_idx_;
-    mirror::Class* param_type = MethodHelper(m).GetClassFromTypeIdx(type_idx);
+    mirror::Class* param_type = mh.GetClassFromTypeIdx(type_idx);
     if (param_type == nullptr) {
-      Thread* self = Thread::Current();
       CHECK(self->IsExceptionPending());
       LOG(ERROR) << "Internal error: unresolvable type for argument type in JNI invoke: "
-          << MethodHelper(m).GetTypeDescriptorFromTypeIdx(type_idx) << "\n"
+          << h_m->GetTypeDescriptorFromTypeIdx(type_idx) << "\n"
           << self->GetException(nullptr)->Dump();
       self->ClearException();
       ++error_count;
     } else if (!param_type->IsPrimitive()) {
       // TODO: check primitives are in range.
+      // TODO: There is a compaction bug here since GetClassFromTypeIdx can cause thread suspension,
+      // this is a hard to fix problem since the args can contain Object*, we need to save and
+      // restore them by using a visitor similar to the ones used in the trampoline entrypoints.
       mirror::Object* argument = reinterpret_cast<mirror::Object*>(args[i + offset]);
       if (argument != nullptr && !argument->InstanceOf(param_type)) {
         LOG(ERROR) << "JNI ERROR (app bug): attempt to pass an instance of "
                    << PrettyTypeOf(argument) << " as argument " << (i + 1)
-                   << " to " << PrettyMethod(m);
+                   << " to " << PrettyMethod(h_m.Get());
         ++error_count;
       }
     } else if (param_type->IsPrimitiveLong() || param_type->IsPrimitiveDouble()) {
@@ -387,7 +394,7 @@
     // TODO: pass the JNI function name (such as "CallVoidMethodV") through so we can call JniAbort
     // with an argument.
     JniAbortF(nullptr, "bad arguments passed to %s (see above for details)",
-              PrettyMethod(m).c_str());
+              PrettyMethod(h_m.Get()).c_str());
   }
 }
 
@@ -414,33 +421,36 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
-  MethodHelper mh(method);
+  uint32_t shorty_len = 0;
+  const char* shorty = method->GetShorty(&shorty_len);
   JValue result;
-  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+  ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromVarArgs(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
+  InvokeWithArgArray(soa, method, &arg_array, &result, shorty);
   return result;
 }
 
 JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
-  MethodHelper mh(method);
+  uint32_t shorty_len = 0;
+  const char* shorty = method->GetShorty(&shorty_len);
   JValue result;
-  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+  ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromJValues(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
+  InvokeWithArgArray(soa, method, &arg_array, &result, shorty);
   return result;
 }
 
 JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
-  MethodHelper mh(method);
+  uint32_t shorty_len = 0;
+  const char* shorty = method->GetShorty(&shorty_len);
   JValue result;
-  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+  ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromJValues(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
+  InvokeWithArgArray(soa, method, &arg_array, &result, shorty);
   return result;
 }
 
@@ -448,11 +458,12 @@
                                            jobject obj, jmethodID mid, va_list args) {
   mirror::Object* receiver = soa.Decode<mirror::Object*>(obj);
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
-  MethodHelper mh(method);
+  uint32_t shorty_len = 0;
+  const char* shorty = method->GetShorty(&shorty_len);
   JValue result;
-  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+  ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromVarArgs(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
+  InvokeWithArgArray(soa, method, &arg_array, &result, shorty);
   return result;
 }
 
@@ -493,8 +504,7 @@
   // Get our arrays of arguments and their types, and check they're the same size.
   mirror::ObjectArray<mirror::Object>* objects =
       soa.Decode<mirror::ObjectArray<mirror::Object>*>(javaArgs);
-  MethodHelper mh(m);
-  const DexFile::TypeList* classes = mh.GetParameterTypeList();
+  const DexFile::TypeList* classes = m->GetParameterTypeList();
   uint32_t classes_size = (classes == nullptr) ? 0 : classes->Size();
   uint32_t arg_count = (objects != nullptr) ? objects->GetLength() : 0;
   if (arg_count != classes_size) {
@@ -513,13 +523,17 @@
 
   // Invoke the method.
   JValue result;
-  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+  uint32_t shorty_len = 0;
+  const char* shorty = m->GetShorty(&shorty_len);
+  ArgArray arg_array(shorty, shorty_len);
+  StackHandleScope<1> hs(soa.Self());
+  MethodHelper mh(hs.NewHandle(m));
   if (!arg_array.BuildArgArrayFromObjectArray(soa, receiver, objects, mh)) {
     CHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
 
-  InvokeWithArgArray(soa, m, &arg_array, &result, mh.GetShorty());
+  InvokeWithArgArray(soa, m, &arg_array, &result, shorty);
 
   // Wrap any exception with "Ljava/lang/reflect/InvocationTargetException;" and return early.
   if (soa.Self()->IsExceptionPending()) {
@@ -801,6 +815,10 @@
 bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags) {
   NthCallerVisitor visitor(Thread::Current(), 2);
   visitor.WalkStack();
+  if (UNLIKELY(visitor.caller == nullptr)) {
+    // The caller is an attached native thread.
+    return (access_flags & kAccPublic) != 0;
+  }
   mirror::Class* caller_class = visitor.caller->GetDeclaringClass();
 
   if (((access_flags & kAccPublic) != 0) || (caller_class == declaring_class)) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index f16eddf..717381c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -18,7 +18,9 @@
 
 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
 #include <sys/mount.h>
+#ifdef __linux__
 #include <linux/fs.h>
+#endif
 
 #include <signal.h>
 #include <sys/syscall.h>
@@ -44,6 +46,7 @@
 #include "atomic.h"
 #include "class_linker.h"
 #include "debugger.h"
+#include "fault_handler.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
@@ -306,7 +309,6 @@
   GetInternTable()->SweepInternTableWeaks(visitor, arg);
   GetMonitorList()->SweepMonitorList(visitor, arg);
   GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
-  Dbg::UpdateObjectPointers(visitor, arg);
 }
 
 bool Runtime::Create(const Options& options, bool ignore_unrecognized) {
@@ -436,6 +438,7 @@
 
 // Do zygote-mode-only initialization.
 bool Runtime::InitZygote() {
+#ifdef __linux__
   // zygote goes into its own process group
   setpgid(0, 0);
 
@@ -466,6 +469,10 @@
   }
 
   return true;
+#else
+  UNIMPLEMENTED(FATAL);
+  return false;
+#endif
 }
 
 void Runtime::DidForkFromZygote() {
@@ -567,7 +574,7 @@
       break;
   }
 
-  if (implicit_checks_supported &&
+  if (!options->interpreter_only_ && implicit_checks_supported &&
       (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
           ParsedOptions::kExplicitNullCheck |
           ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler)) {
@@ -1035,14 +1042,12 @@
   monitor_list_->DisallowNewMonitors();
   intern_table_->DisallowNewInterns();
   java_vm_->DisallowNewWeakGlobals();
-  Dbg::DisallowNewObjectRegistryObjects();
 }
 
 void Runtime::AllowNewSystemWeaks() {
   monitor_list_->AllowNewMonitors();
   intern_table_->AllowNewInterns();
   java_vm_->AllowNewWeakGlobals();
-  Dbg::AllowNewObjectRegistryObjects();
 }
 
 void Runtime::SetInstructionSet(InstructionSet instruction_set) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 808d706..8776a59 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -25,26 +25,21 @@
 #include <utility>
 #include <vector>
 
-#include "base/macros.h"
-#include "base/stringpiece.h"
-#include "gc/collector_type.h"
-#include "gc/heap.h"
-#include "globals.h"
-#include "instruction_set.h"
 #include "instrumentation.h"
+#include "instruction_set.h"
 #include "jobject_comparator.h"
 #include "object_callbacks.h"
+#include "offsets.h"
 #include "profiler_options.h"
 #include "quick/quick_method_frame_info.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
-#include "fault_handler.h"
 
 namespace art {
 
 namespace gc {
   class Heap;
-}
+}  // namespace gc
 namespace mirror {
   class ArtMethod;
   class ClassLoader;
@@ -65,7 +60,10 @@
 class JavaVMExt;
 class MonitorList;
 class MonitorPool;
+class NullPointerHandler;
 class SignalCatcher;
+class StackOverflowHandler;
+class SuspensionHandler;
 class ThreadList;
 class Trace;
 class Transaction;
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 960d332..46ee274 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -327,7 +327,7 @@
     while (true) {
     }
   }
-
+#ifdef __linux__
   // Remove our signal handler for this signal...
   struct sigaction action;
   memset(&action, 0, sizeof(action));
@@ -336,6 +336,9 @@
   sigaction(signal_number, &action, NULL);
   // ...and re-raise so we die with the appropriate status.
   kill(getpid(), signal_number);
+#else
+  exit(EXIT_FAILURE);
+#endif
 }
 
 void Runtime::InitPlatformSignalHandlers() {
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index d56495e..7ce68c6 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_SCOPED_THREAD_STATE_CHANGE_H_
 
 #include "base/casts.h"
+#include "jni_internal-inl.h"
 #include "thread-inl.h"
 #include "verify_object.h"
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index ef09816..7e922c5 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -40,7 +40,7 @@
   } else if (m->IsNative()) {
     return GetVRegReference(0);
   } else {
-    const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+    const DexFile::CodeItem* code_item = m->GetCodeItem();
     CHECK(code_item != NULL) << PrettyMethod(m);
     uint16_t reg = code_item->registers_size_ - code_item->ins_size_;
     return GetVRegReference(reg);
@@ -125,7 +125,7 @@
       return cur_shadow_frame_->GetVRegReference(0);
     }
   } else {
-    const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+    const DexFile::CodeItem* code_item = m->GetCodeItem();
     if (code_item == NULL) {
       UNIMPLEMENTED(ERROR) << "Failed to determine this object of abstract or proxy method: "
           << PrettyMethod(m);
@@ -157,7 +157,7 @@
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       return GetGPR(vmap_table.ComputeRegister(spill_mask, vmap_offset, kind));
     } else {
-      const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+      const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
       return *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
                           frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
@@ -184,7 +184,7 @@
       const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kReferenceVReg);
       SetGPR(reg, new_value);
     } else {
-      const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+      const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
       int offset = GetVRegOffset(code_item, frame_info.CoreSpillMask(), frame_info.FpSpillMask(),
                                  frame_info.FrameSizeInBytes(), vreg, kRuntimeISA);
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index fc886d5..b1180bd 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -21,9 +21,11 @@
 
 #include <pthread.h>
 
+#include "cutils/atomic-inline.h"
+
 #include "base/casts.h"
 #include "base/mutex-inl.h"
-#include "cutils/atomic-inline.h"
+#include "gc/heap.h"
 #include "jni_internal.h"
 
 namespace art {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3bfdc3f..6980530 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -495,7 +495,9 @@
   }
 
   // TODO: move this into the Linux GetThreadStack implementation.
-#if !defined(__APPLE__)
+#if defined(__APPLE__)
+  bool is_main_thread = false;
+#else
   // If we're the main thread, check whether we were run with an unlimited stack. In that case,
   // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
   // will be broken because we'll die long before we get close to 2GB.
@@ -892,8 +894,7 @@
       if (m->IsNative()) {
         os << "(Native method)";
       } else {
-        mh.ChangeMethod(m);
-        const char* source_file(mh.GetDeclaringClassSourceFile());
+        const char* source_file(m->GetDeclaringClassSourceFile());
         os << "(" << (source_file != nullptr ? source_file : "unavailable")
            << ":" << line_number << ")";
       }
@@ -933,7 +934,7 @@
   std::ostream& os;
   const Thread* thread;
   const bool can_allocate;
-  MethodHelper mh;
+  mirror::ArtMethod* method;
   mirror::ArtMethod* last_method;
   int last_line_number;
   int repetition_count;
@@ -1530,7 +1531,6 @@
           soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal);
     // Prepare parameters for StackTraceElement(String cls, String method, String file, int line)
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
-    MethodHelper mh(method);
     int32_t line_number;
     StackHandleScope<3> hs(soa.Self());
     auto class_name_object(hs.NewHandle<mirror::String>(nullptr));
@@ -1542,17 +1542,17 @@
     } else {
       mirror::IntArray* pc_trace = down_cast<mirror::IntArray*>(method_trace->Get(depth));
       uint32_t dex_pc = pc_trace->Get(i);
-      line_number = mh.GetLineNumFromDexPC(dex_pc);
+      line_number = method->GetLineNumFromDexPC(dex_pc);
       // Allocate element, potentially triggering GC
       // TODO: reuse class_name_object via Class::name_?
-      const char* descriptor = mh.GetDeclaringClassDescriptor();
+      const char* descriptor = method->GetDeclaringClassDescriptor();
       CHECK(descriptor != nullptr);
       std::string class_name(PrettyDescriptor(descriptor));
       class_name_object.Assign(mirror::String::AllocFromModifiedUtf8(soa.Self(), class_name.c_str()));
       if (class_name_object.Get() == nullptr) {
         return nullptr;
       }
-      const char* source_file = mh.GetDeclaringClassSourceFile();
+      const char* source_file = method->GetDeclaringClassSourceFile();
       if (source_file != nullptr) {
         source_name_object.Assign(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
         if (source_name_object.Get() == nullptr) {
@@ -1560,7 +1560,7 @@
         }
       }
     }
-    const char* method_name = mh.GetName();
+    const char* method_name = method->GetName();
     CHECK(method_name != nullptr);
     Handle<mirror::String> method_name_object(
         hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), method_name)));
@@ -1613,6 +1613,7 @@
   Handle<mirror::ArtMethod> saved_throw_method(hs.NewHandle(throw_location.GetMethod()));
   // Ignore the cause throw location. TODO: should we report this as a re-throw?
   ScopedLocalRef<jobject> cause(GetJniEnv(), soa.AddLocalReference<jobject>(GetException(nullptr)));
+  bool is_exception_reported = IsExceptionReportedToInstrumentation();
   ClearException();
   Runtime* runtime = Runtime::Current();
 
@@ -1643,6 +1644,7 @@
     ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                          throw_location.GetDexPc());
     SetException(gc_safe_throw_location, Runtime::Current()->GetPreAllocatedOutOfMemoryError());
+    SetExceptionReportedToInstrumentation(is_exception_reported);
     return;
   }
 
@@ -1695,6 +1697,7 @@
     ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                          throw_location.GetDexPc());
     SetException(gc_safe_throw_location, exception.Get());
+    SetExceptionReportedToInstrumentation(is_exception_reported);
   } else {
     jvalue jv_args[2];
     size_t i = 0;
@@ -1712,6 +1715,7 @@
       ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                            throw_location.GetDexPc());
       SetException(gc_safe_throw_location, exception.Get());
+      SetExceptionReportedToInstrumentation(is_exception_reported);
     }
   }
 }
@@ -1864,7 +1868,6 @@
   QUICK_ENTRY_POINT_INFO(pShrLong)
   QUICK_ENTRY_POINT_INFO(pUshrLong)
   QUICK_ENTRY_POINT_INFO(pIndexOf)
-  QUICK_ENTRY_POINT_INFO(pMemcmp16)
   QUICK_ENTRY_POINT_INFO(pStringCompareTo)
   QUICK_ENTRY_POINT_INFO(pMemcpy)
   QUICK_ENTRY_POINT_INFO(pQuickImtConflictTrampoline)
@@ -1895,13 +1898,14 @@
   CHECK(exception != nullptr);
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
+  bool is_exception_reported = IsExceptionReportedToInstrumentation();
   ClearException();
   bool is_deoptimization = (exception == GetDeoptimizationException());
   QuickExceptionHandler exception_handler(this, is_deoptimization);
   if (is_deoptimization) {
     exception_handler.DeoptimizeStack();
   } else {
-    exception_handler.FindCatch(throw_location, exception);
+    exception_handler.FindCatch(throw_location, exception, is_exception_reported);
   }
   exception_handler.UpdateInstrumentationStack();
   exception_handler.DoLongJump();
@@ -2044,8 +2048,7 @@
     if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
       const uint8_t* native_gc_map = m->GetNativeGcMap();
       CHECK(native_gc_map != nullptr) << PrettyMethod(m);
-      mh_.ChangeMethod(m);
-      const DexFile::CodeItem* code_item = mh_.GetCodeItem();
+      const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be nullptr or how would we compile its instructions?
       NativePcOffsetToReferenceMap map(native_gc_map);
       size_t num_regs = std::min(map.RegWidth() * 8,
@@ -2100,9 +2103,6 @@
 
   // Visitor for when we visit a root.
   const RootVisitor& visitor_;
-
-  // A method helper we keep around to avoid dex file/cache re-computations.
-  MethodHelper mh_;
 };
 
 class RootCallbackVisitor {
diff --git a/runtime/thread.h b/runtime/thread.h
index 5de54b3..bff9b52 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -329,6 +329,7 @@
   void ClearException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     tlsPtr_.exception = nullptr;
     tlsPtr_.throw_location.Clear();
+    SetExceptionReportedToInstrumentation(false);
   }
 
   // Find catch block and perform long jump to appropriate exception handle
@@ -809,6 +810,14 @@
     tlsPtr_.rosalloc_runs[index] = run;
   }
 
+  bool IsExceptionReportedToInstrumentation() const {
+    return tls32_.is_exception_reported_to_instrumentation_;
+  }
+
+  void SetExceptionReportedToInstrumentation(bool reported) {
+    tls32_.is_exception_reported_to_instrumentation_ = reported;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
@@ -911,7 +920,7 @@
     explicit tls_32bit_sized_values(bool is_daemon) :
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
-      thread_exit_check_count(0) {
+      thread_exit_check_count(0), is_exception_reported_to_instrumentation_(false) {
     }
 
     union StateAndFlags state_and_flags;
@@ -947,6 +956,10 @@
 
     // How many times has our pthread key's destructor been called?
     uint32_t thread_exit_check_count;
+
+    // When true this field indicates that the exception associated with this thread has already
+    // been reported to instrumentation.
+    bool32_t is_exception_reported_to_instrumentation_;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/runtime/throw_location.cc b/runtime/throw_location.cc
index 06b6e8d..a1347a4 100644
--- a/runtime/throw_location.cc
+++ b/runtime/throw_location.cc
@@ -27,7 +27,7 @@
 std::string ThrowLocation::Dump() const {
   if (method_ != nullptr) {
     return StringPrintf("%s:%d", PrettyMethod(method_).c_str(),
-                        MethodHelper(method_).GetLineNumFromDexPC(dex_pc_));
+                        method_->GetLineNumFromDexPC(dex_pc_));
   } else {
     return "unknown throw location";
   }
diff --git a/runtime/trace.cc b/runtime/trace.cc
index d53b369..032a566 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -677,12 +677,10 @@
 }
 
 void Trace::DumpMethodList(std::ostream& os, const std::set<mirror::ArtMethod*>& visited_methods) {
-  MethodHelper mh;
   for (const auto& method : visited_methods) {
-    mh.ChangeMethod(method);
     os << StringPrintf("%p\t%s\t%s\t%s\t%s\n", method,
-        PrettyDescriptor(mh.GetDeclaringClassDescriptor()).c_str(), mh.GetName(),
-        mh.GetSignature().ToString().c_str(), mh.GetDeclaringClassSourceFile());
+        PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
+        method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
   }
 }
 
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 3645ed2..a03b389 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -115,48 +115,48 @@
   // Lookup fields.
   mirror::ArtField* booleanField = h_klass->FindDeclaredStaticField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
-  ASSERT_EQ(FieldHelper(booleanField).GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
+  ASSERT_EQ(booleanField->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   ASSERT_EQ(booleanField->GetBoolean(h_klass.Get()), false);
 
   mirror::ArtField* byteField = h_klass->FindDeclaredStaticField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
-  ASSERT_EQ(FieldHelper(byteField).GetTypeAsPrimitiveType(), Primitive::kPrimByte);
+  ASSERT_EQ(byteField->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   ASSERT_EQ(byteField->GetByte(h_klass.Get()), 0);
 
   mirror::ArtField* charField = h_klass->FindDeclaredStaticField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
-  ASSERT_EQ(FieldHelper(charField).GetTypeAsPrimitiveType(), Primitive::kPrimChar);
+  ASSERT_EQ(charField->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   ASSERT_EQ(charField->GetChar(h_klass.Get()), 0u);
 
   mirror::ArtField* shortField = h_klass->FindDeclaredStaticField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
-  ASSERT_EQ(FieldHelper(shortField).GetTypeAsPrimitiveType(), Primitive::kPrimShort);
+  ASSERT_EQ(shortField->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   ASSERT_EQ(shortField->GetShort(h_klass.Get()), 0);
 
   mirror::ArtField* intField = h_klass->FindDeclaredStaticField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
-  ASSERT_EQ(FieldHelper(intField).GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  ASSERT_EQ(intField->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   ASSERT_EQ(intField->GetInt(h_klass.Get()), 0);
 
   mirror::ArtField* longField = h_klass->FindDeclaredStaticField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
-  ASSERT_EQ(FieldHelper(longField).GetTypeAsPrimitiveType(), Primitive::kPrimLong);
+  ASSERT_EQ(longField->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   ASSERT_EQ(longField->GetLong(h_klass.Get()), static_cast<int64_t>(0));
 
   mirror::ArtField* floatField = h_klass->FindDeclaredStaticField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
-  ASSERT_EQ(FieldHelper(floatField).GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
+  ASSERT_EQ(floatField->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   ASSERT_EQ(floatField->GetFloat(h_klass.Get()), static_cast<float>(0.0f));
 
   mirror::ArtField* doubleField = h_klass->FindDeclaredStaticField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
-  ASSERT_EQ(FieldHelper(doubleField).GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
+  ASSERT_EQ(doubleField->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   ASSERT_EQ(doubleField->GetDouble(h_klass.Get()), static_cast<double>(0.0));
 
   mirror::ArtField* objectField = h_klass->FindDeclaredStaticField("objectField",
                                                                       "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
-  ASSERT_EQ(FieldHelper(objectField).GetTypeAsPrimitiveType(), Primitive::kPrimNot);
+  ASSERT_EQ(objectField->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
   ASSERT_EQ(objectField->GetObject(h_klass.Get()), nullptr);
 
   // Create a java.lang.Object instance to set objectField.
@@ -214,48 +214,48 @@
   // Lookup fields.
   mirror::ArtField* booleanField = h_klass->FindDeclaredInstanceField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
-  ASSERT_EQ(FieldHelper(booleanField).GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
+  ASSERT_EQ(booleanField->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   ASSERT_EQ(booleanField->GetBoolean(h_instance.Get()), false);
 
   mirror::ArtField* byteField = h_klass->FindDeclaredInstanceField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
-  ASSERT_EQ(FieldHelper(byteField).GetTypeAsPrimitiveType(), Primitive::kPrimByte);
+  ASSERT_EQ(byteField->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   ASSERT_EQ(byteField->GetByte(h_instance.Get()), 0);
 
   mirror::ArtField* charField = h_klass->FindDeclaredInstanceField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
-  ASSERT_EQ(FieldHelper(charField).GetTypeAsPrimitiveType(), Primitive::kPrimChar);
+  ASSERT_EQ(charField->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   ASSERT_EQ(charField->GetChar(h_instance.Get()), 0u);
 
   mirror::ArtField* shortField = h_klass->FindDeclaredInstanceField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
-  ASSERT_EQ(FieldHelper(shortField).GetTypeAsPrimitiveType(), Primitive::kPrimShort);
+  ASSERT_EQ(shortField->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   ASSERT_EQ(shortField->GetShort(h_instance.Get()), 0);
 
   mirror::ArtField* intField = h_klass->FindDeclaredInstanceField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
-  ASSERT_EQ(FieldHelper(intField).GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  ASSERT_EQ(intField->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   ASSERT_EQ(intField->GetInt(h_instance.Get()), 0);
 
   mirror::ArtField* longField = h_klass->FindDeclaredInstanceField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
-  ASSERT_EQ(FieldHelper(longField).GetTypeAsPrimitiveType(), Primitive::kPrimLong);
+  ASSERT_EQ(longField->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   ASSERT_EQ(longField->GetLong(h_instance.Get()), static_cast<int64_t>(0));
 
   mirror::ArtField* floatField = h_klass->FindDeclaredInstanceField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
-  ASSERT_EQ(FieldHelper(floatField).GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
+  ASSERT_EQ(floatField->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   ASSERT_EQ(floatField->GetFloat(h_instance.Get()), static_cast<float>(0.0f));
 
   mirror::ArtField* doubleField = h_klass->FindDeclaredInstanceField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
-  ASSERT_EQ(FieldHelper(doubleField).GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
+  ASSERT_EQ(doubleField->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   ASSERT_EQ(doubleField->GetDouble(h_instance.Get()), static_cast<double>(0.0));
 
   mirror::ArtField* objectField = h_klass->FindDeclaredInstanceField("objectField",
                                                                         "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
-  ASSERT_EQ(FieldHelper(objectField).GetTypeAsPrimitiveType(), Primitive::kPrimNot);
+  ASSERT_EQ(objectField->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
   ASSERT_EQ(objectField->GetObject(h_instance.Get()), nullptr);
 
   // Create a java.lang.Object instance to set objectField.
diff --git a/runtime/utils.cc b/runtime/utils.cc
index f562252..f60f795 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -290,15 +290,15 @@
   if (f == NULL) {
     return "null";
   }
-  FieldHelper fh(f);
   std::string result;
   if (with_type) {
-    result += PrettyDescriptor(fh.GetTypeDescriptor());
+    result += PrettyDescriptor(f->GetTypeDescriptor());
     result += ' ';
   }
-  result += PrettyDescriptor(fh.GetDeclaringClassDescriptor());
+  StackHandleScope<1> hs(Thread::Current());
+  result += PrettyDescriptor(FieldHelper(hs.NewHandle(f)).GetDeclaringClassDescriptor());
   result += '.';
-  result += fh.GetName();
+  result += f->GetName();
   return result;
 }
 
@@ -365,15 +365,14 @@
   if (m == nullptr) {
     return "null";
   }
-  MethodHelper mh(m);
-  std::string result(PrettyDescriptor(mh.GetDeclaringClassDescriptor()));
+  std::string result(PrettyDescriptor(m->GetDeclaringClassDescriptor()));
   result += '.';
-  result += mh.GetName();
+  result += m->GetName();
   if (UNLIKELY(m->IsFastNative())) {
     result += "!";
   }
   if (with_signature) {
-    const Signature signature = mh.GetSignature();
+    const Signature signature = m->GetSignature();
     std::string sig_as_string(signature.ToString());
     if (signature == Signature::NoSignature()) {
       return result + sig_as_string;
@@ -552,6 +551,18 @@
   }
 }
 
+std::string PrintableChar(uint16_t ch) {
+  std::string result;
+  result += '\'';
+  if (NeedsEscaping(ch)) {
+    StringAppendF(&result, "\\u%04x", ch);
+  } else {
+    result += ch;
+  }
+  result += '\'';
+  return result;
+}
+
 std::string PrintableString(const std::string& utf) {
   std::string result;
   result += '"';
@@ -630,15 +641,14 @@
 }
 
 std::string JniShortName(mirror::ArtMethod* m) {
-  MethodHelper mh(m);
-  std::string class_name(mh.GetDeclaringClassDescriptor());
+  std::string class_name(m->GetDeclaringClassDescriptor());
   // Remove the leading 'L' and trailing ';'...
   CHECK_EQ(class_name[0], 'L') << class_name;
   CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
   class_name.erase(0, 1);
   class_name.erase(class_name.size() - 1, 1);
 
-  std::string method_name(mh.GetName());
+  std::string method_name(m->GetName());
 
   std::string short_name;
   short_name += "Java_";
@@ -653,7 +663,7 @@
   long_name += JniShortName(m);
   long_name += "__";
 
-  std::string signature(MethodHelper(m).GetSignature().ToString());
+  std::string signature(m->GetSignature().ToString());
   signature.erase(0, 1);
   signature.erase(signature.begin() + signature.find(')'), signature.end());
 
@@ -1044,6 +1054,7 @@
   if (current_method != nullptr) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
+#ifdef __linux__
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid));
   if (!backtrace->Unwind(0)) {
     os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n";
@@ -1085,6 +1096,7 @@
     }
     os << "\n";
   }
+#endif
 }
 
 #if defined(__APPLE__)
diff --git a/runtime/utils.h b/runtime/utils.h
index 7be5d44..6d52459 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -23,7 +23,6 @@
 #include <vector>
 
 #include "base/logging.h"
-#include "base/stringprintf.h"
 #include "globals.h"
 #include "instruction_set.h"
 #include "primitive.h"
@@ -141,9 +140,8 @@
 template<typename T>
 static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) {
   return
-      // DCHECK(IsPowerOfTwo(n)) in a form acceptable in a constexpr function:
-      (kIsDebugBuild && !IsPowerOfTwo(n)) ? (LOG(FATAL) << n << " isn't a power of 2", T(0))
-      : (x & -n);
+      DCHECK_CONSTEXPR(IsPowerOfTwo(n), , T(0))
+      (x & -n);
 }
 
 template<typename T>
@@ -205,17 +203,7 @@
   return (ch < ' ' || ch > '~');
 }
 
-static inline std::string PrintableChar(uint16_t ch) {
-  std::string result;
-  result += '\'';
-  if (NeedsEscaping(ch)) {
-    StringAppendF(&result, "\\u%04x", ch);
-  } else {
-    result += ch;
-  }
-  result += '\'';
-  return result;
-}
+std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
 // Java escapes are used for non-ASCII characters.
diff --git a/runtime/verifier/instruction_flags.h b/runtime/verifier/instruction_flags.h
index e50ba13..f8abca0 100644
--- a/runtime/verifier/instruction_flags.h
+++ b/runtime/verifier/instruction_flags.h
@@ -17,11 +17,11 @@
 #ifndef ART_RUNTIME_VERIFIER_INSTRUCTION_FLAGS_H_
 #define ART_RUNTIME_VERIFIER_INSTRUCTION_FLAGS_H_
 
-#include "base/logging.h"
-
 #include <stdint.h>
 #include <string>
 
+#include "base/logging.h"
+
 namespace art {
 namespace verifier {
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index b5c07aa..2f4e805 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -274,7 +274,7 @@
     result = kHardFailure;
   }
   uint64_t duration_ns = NanoTime() - start_ns;
-  if (duration_ns > MsToNs(100)) {
+  if (duration_ns > MsToNs(100) && !kIsDebugBuild) {
     LOG(WARNING) << "Verification of " << PrettyMethod(method_idx, *dex_file)
                  << " took " << PrettyDuration(duration_ns);
   }
@@ -338,12 +338,11 @@
 
 void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc,
                                       std::vector<uint32_t>* monitor_enter_dex_pcs) {
-  MethodHelper mh(m);
   StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
-  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
-                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
+  MethodVerifier verifier(m->GetDexFile(), &dex_cache, &class_loader, &m->GetClassDef(),
+                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
                           true, false);
   verifier.interesting_dex_pc_ = dex_pc;
   verifier.monitor_enter_dex_pcs_ = monitor_enter_dex_pcs;
@@ -363,12 +362,11 @@
 
 mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m,
                                                            uint32_t dex_pc) {
-  MethodHelper mh(m);
   StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
-  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
-                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
+  MethodVerifier verifier(m->GetDexFile(), &dex_cache, &class_loader, &m->GetClassDef(),
+                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
                           true, false);
   return verifier.FindAccessedFieldAtDexPc(dex_pc);
 }
@@ -394,12 +392,11 @@
 
 mirror::ArtMethod* MethodVerifier::FindInvokedMethodAtDexPc(mirror::ArtMethod* m,
                                                             uint32_t dex_pc) {
-  MethodHelper mh(m);
   StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
-  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
-                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
+  MethodVerifier verifier(m->GetDexFile(), &dex_cache, &class_loader, &m->GetClassDef(),
+                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
                           true, false);
   return verifier.FindInvokedMethodAtDexPc(dex_pc);
 }
@@ -507,6 +504,11 @@
   return *failure_message;
 }
 
+std::ostream& MethodVerifier::LogVerifyInfo() {
+  return info_messages_ << "VFY: " << PrettyMethod(dex_method_idx_, *dex_file_)
+                        << '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
+}
+
 void MethodVerifier::PrependToLastFailMessage(std::string prepend) {
   size_t failure_num = failure_messages_.size();
   DCHECK_NE(failure_num, 0U);
@@ -729,6 +731,10 @@
       result = false;
       break;
   }
+  if (inst->GetVerifyIsRuntimeOnly() && Runtime::Current()->IsCompiler()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "opcode only expected at runtime " << inst->Name();
+    result = false;
+  }
   return result;
 }
 
@@ -2131,19 +2137,22 @@
     case Instruction::INVOKE_SUPER_RANGE: {
       bool is_range = (inst->Opcode() == Instruction::INVOKE_VIRTUAL_RANGE ||
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      bool is_super =  (inst->Opcode() == Instruction::INVOKE_SUPER ||
-                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      mirror::ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL,
-                                                                   is_range, is_super);
+      bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER ||
+                       inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
+      mirror::ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL, is_range,
+                                                              is_super);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
-        MethodHelper mh(called_method);
+        Thread* self = Thread::Current();
+        StackHandleScope<1> hs(self);
+        Handle<mirror::ArtMethod> h_called_method(hs.NewHandle(called_method));
+        MethodHelper mh(h_called_method);
         mirror::Class* return_type_class = mh.GetReturnType(can_load_classes_);
         if (return_type_class != nullptr) {
-          return_type = &reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
+          return_type = &reg_types_.FromClass(h_called_method->GetReturnTypeDescriptor(),
+                                              return_type_class,
                                               return_type_class->CannotBeAssignedFromOtherTypes());
         } else {
-          Thread* self = Thread::Current();
           DCHECK(!can_load_classes_ || self->IsExceptionPending());
           self->ClearException();
         }
@@ -2178,7 +2187,7 @@
         return_type_descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
       } else {
         is_constructor = called_method->IsConstructor();
-        return_type_descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
+        return_type_descriptor = called_method->GetReturnTypeDescriptor();
       }
       if (is_constructor) {
         /*
@@ -2244,10 +2253,10 @@
           uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
           descriptor = dex_file_->StringByTypeIdx(return_type_idx);
         } else {
-          descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
+          descriptor = called_method->GetReturnTypeDescriptor();
         }
-        const RegType& return_type =  reg_types_.FromDescriptor(class_loader_->Get(), descriptor,
-                                                                false);
+        const RegType& return_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor,
+                                                               false);
         if (!return_type.IsLowHalf()) {
           work_line_->SetResultRegisterType(return_type);
         } else {
@@ -2302,7 +2311,7 @@
         uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
       } else {
-        descriptor = MethodHelper(abs_method).GetReturnTypeDescriptor();
+        descriptor = abs_method->GetReturnTypeDescriptor();
       }
       const RegType& return_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor,
                                                              false);
@@ -2569,7 +2578,7 @@
       bool is_range = (inst->Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK);
       mirror::ArtMethod* called_method = VerifyInvokeVirtualQuickArgs(inst, is_range);
       if (called_method != NULL) {
-        const char* descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
+        const char* descriptor = called_method->GetReturnTypeDescriptor();
         const RegType& return_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor,
                                                                false);
         if (!return_type.IsLowHalf()) {
@@ -2957,7 +2966,7 @@
     return NULL;
   }
   // Disallow any calls to class initializers.
-  if (MethodHelper(res_method).IsClassInitializer()) {
+  if (res_method->IsClassInitializer()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "rejecting call to class initializer "
                                       << PrettyMethod(res_method);
     return NULL;
@@ -3004,6 +3013,26 @@
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+
+  // As the method may not have been resolved, make this static check against what we expect.
+  const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
+  uint32_t shorty_idx = dex_file_->GetProtoId(method_id.proto_idx_).shorty_idx_;
+  uint32_t shorty_len;
+  const char* descriptor = dex_file_->StringDataAndUtf16LengthByIdx(shorty_idx, &shorty_len);
+  int32_t sig_registers = method_type == METHOD_STATIC ? 0 : 1;
+  for (size_t i = 1; i < shorty_len; i++) {
+    if (descriptor[i] == 'J' || descriptor[i] == 'D') {
+      sig_registers += 2;
+    } else {
+      sig_registers++;
+    }
+  }
+  if (inst->VRegA() != sig_registers) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
+        " arguments, found " << sig_registers;
+    return nullptr;
+  }
+
   mirror::ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == NULL) {  // error or class is unresolved
     return NULL;
@@ -3022,12 +3051,11 @@
     }
     mirror::Class* super_klass = super.GetClass();
     if (res_method->GetMethodIndex() >= super_klass->GetVTable()->GetLength()) {
-      MethodHelper mh(res_method);
       Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
                                    << PrettyMethod(dex_method_idx_, *dex_file_)
                                    << " to super " << super
-                                   << "." << mh.GetName()
-                                   << mh.GetSignature();
+                                   << "." << res_method->GetName()
+                                   << res_method->GetSignature();
       return NULL;
     }
   }
@@ -3076,8 +3104,7 @@
    * Process the target method's signature. This signature may or may not
    * have been verified, so we can't assume it's properly formed.
    */
-  MethodHelper mh(res_method);
-  const DexFile::TypeList* params = mh.GetParameterTypeList();
+  const DexFile::TypeList* params = res_method->GetParameterTypeList();
   size_t params_size = params == NULL ? 0 : params->Size();
   uint32_t arg[5];
   if (!is_range) {
@@ -3091,7 +3118,7 @@
       return NULL;
     }
     const char* descriptor =
-        mh.GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
+        res_method->GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
     if (descriptor == NULL) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
           << " missing signature component";
@@ -3199,8 +3226,7 @@
    * Process the target method's signature. This signature may or may not
    * have been verified, so we can't assume it's properly formed.
    */
-  MethodHelper mh(res_method);
-  const DexFile::TypeList* params = mh.GetParameterTypeList();
+  const DexFile::TypeList* params = res_method->GetParameterTypeList();
   size_t params_size = params == NULL ? 0 : params->Size();
   uint32_t arg[5];
   if (!is_range) {
@@ -3216,7 +3242,7 @@
       return NULL;
     }
     const char* descriptor =
-        mh.GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
+        res_method->GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
     if (descriptor == NULL) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
                                         << " missing signature component";
@@ -3513,13 +3539,17 @@
   }
   const RegType* field_type = nullptr;
   if (field != NULL) {
-    FieldHelper fh(field);
-    mirror::Class* field_type_class = fh.GetType(can_load_classes_);
+    Thread* self = Thread::Current();
+    mirror::Class* field_type_class;
+    {
+      StackHandleScope<1> hs(self);
+      HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
+      field_type_class = FieldHelper(h_field).GetType(can_load_classes_);
+    }
     if (field_type_class != nullptr) {
-      field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+      field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
                                          field_type_class->CannotBeAssignedFromOtherTypes());
     } else {
-      Thread* self = Thread::Current();
       DCHECK(!can_load_classes_ || self->IsExceptionPending());
       self->ClearException();
     }
@@ -3580,10 +3610,15 @@
                                       << " from other class " << GetDeclaringClass();
       return;
     }
-    FieldHelper fh(field);
-    mirror::Class* field_type_class = fh.GetType(can_load_classes_);
+    mirror::Class* field_type_class;
+    {
+      StackHandleScope<1> hs(Thread::Current());
+      HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
+      FieldHelper fh(h_field);
+      field_type_class = fh.GetType(can_load_classes_);
+    }
     if (field_type_class != nullptr) {
-      field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+      field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
                                          field_type_class->CannotBeAssignedFromOtherTypes());
     } else {
       Thread* self = Thread::Current();
@@ -3643,18 +3678,23 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Cannot infer field from " << inst->Name();
     return;
   }
-  FieldHelper fh(field);
-  mirror::Class* field_type_class = fh.GetType(can_load_classes_);
+  mirror::Class* field_type_class;
+  {
+    StackHandleScope<1> hs(Thread::Current());
+    HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
+    FieldHelper fh(h_field);
+    field_type_class = fh.GetType(can_load_classes_);
+  }
   const RegType* field_type;
   if (field_type_class != nullptr) {
-    field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+    field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
                                        field_type_class->CannotBeAssignedFromOtherTypes());
   } else {
     Thread* self = Thread::Current();
     DCHECK(!can_load_classes_ || self->IsExceptionPending());
     self->ClearException();
     field_type = &reg_types_.FromDescriptor(field->GetDeclaringClass()->GetClassLoader(),
-                                            fh.GetTypeDescriptor(), false);
+                                            field->GetTypeDescriptor(), false);
   }
   DCHECK(field_type != nullptr);
   const uint32_t vregA = inst->VRegA_22c();
@@ -3698,7 +3738,7 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Cannot infer field from " << inst->Name();
     return;
   }
-  const char* descriptor = FieldHelper(field).GetTypeDescriptor();
+  const char* descriptor = field->GetTypeDescriptor();
   mirror::ClassLoader* loader = field->GetDeclaringClass()->GetClassLoader();
   const RegType& field_type = reg_types_.FromDescriptor(loader, descriptor, false);
   if (field != NULL) {
@@ -3830,13 +3870,18 @@
 const RegType& MethodVerifier::GetMethodReturnType() {
   if (return_type_ == nullptr) {
     if (mirror_method_ != NULL) {
-      MethodHelper mh(mirror_method_);
-      mirror::Class* return_type_class = mh.GetReturnType(can_load_classes_);
+      Thread* self = Thread::Current();
+      StackHandleScope<1> hs(self);
+      mirror::Class* return_type_class;
+      {
+        HandleWrapper<mirror::ArtMethod> h_mirror_method(hs.NewHandleWrapper(&mirror_method_));
+        return_type_class = MethodHelper(h_mirror_method).GetReturnType(can_load_classes_);
+      }
       if (return_type_class != nullptr) {
-        return_type_ = &reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
+        return_type_ = &reg_types_.FromClass(mirror_method_->GetReturnTypeDescriptor(),
+                                             return_type_class,
                                              return_type_class->CannotBeAssignedFromOtherTypes());
       } else {
-        Thread* self = Thread::Current();
         DCHECK(!can_load_classes_ || self->IsExceptionPending());
         self->ClearException();
       }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index a23e80d..451c9e2 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -29,9 +29,8 @@
 #include "dex_instruction.h"
 #include "instruction_flags.h"
 #include "method_reference.h"
-#include "mirror/object.h"
 #include "reg_type.h"
-#include "reg_type_cache-inl.h"
+#include "reg_type_cache.h"
 #include "register_line.h"
 #include "safe_map.h"
 
@@ -170,10 +169,7 @@
   std::ostream& Fail(VerifyError error);
 
   // Log for verification information.
-  std::ostream& LogVerifyInfo() {
-    return info_messages_ << "VFY: " << PrettyMethod(dex_method_idx_, *dex_file_)
-                          << '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
-  }
+  std::ostream& LogVerifyInfo();
 
   // Dump the failures encountered by the verifier.
   std::ostream& DumpFailures(std::ostream& os);
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index a23b8c4..64001d3 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -17,18 +17,18 @@
 #ifndef ART_RUNTIME_VERIFIER_REG_TYPE_H_
 #define ART_RUNTIME_VERIFIER_REG_TYPE_H_
 
-#include "base/macros.h"
-#include "globals.h"
-#include "object_callbacks.h"
-#include "primitive.h"
-
-#include "jni.h"
-
 #include <limits>
 #include <stdint.h>
 #include <set>
 #include <string>
 
+#include "jni.h"
+
+#include "base/macros.h"
+#include "globals.h"
+#include "object_callbacks.h"
+#include "primitive.h"
+
 namespace art {
 namespace mirror {
 class Class;
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index b3a2847..0989cd0 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -18,7 +18,9 @@
 #define ART_RUNTIME_VERIFIER_REGISTER_LINE_INL_H_
 
 #include "register_line.h"
+
 #include "method_verifier.h"
+#include "reg_type_cache-inl.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index a3e3e3b..d21f39b 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -16,6 +16,7 @@
 
 #include "register_line.h"
 
+#include "base/stringprintf.h"
 #include "dex_instruction-inl.h"
 #include "method_verifier.h"
 #include "register_line-inl.h"
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index dade203..57c7517 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -51,7 +51,7 @@
 class RegisterLine {
  public:
   static RegisterLine* Create(size_t num_regs, MethodVerifier* verifier) {
-    uint8_t* memory = new uint8_t[sizeof(RegisterLine) + (num_regs * sizeof(uint16_t))];
+    void* memory = operator new(sizeof(RegisterLine) + (num_regs * sizeof(uint16_t)));
     RegisterLine* rl = new (memory) RegisterLine(num_regs, verifier);
     return rl;
   }
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index 1ce4a04..f0fe934 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -979,7 +979,7 @@
         if (lres == 0x96deff00aa010000L) {
             System.out.println("longShiftTest PASSED");
         } else {
-            System.out.println("longShiftTest FAILED: " + res);
+            System.out.println("longShiftTest FAILED: " + lres);
             failure = true;
         }
 
diff --git a/test/404-optimizing-allocator/expected.txt b/test/404-optimizing-allocator/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/404-optimizing-allocator/expected.txt
diff --git a/test/404-optimizing-allocator/info.txt b/test/404-optimizing-allocator/info.txt
new file mode 100644
index 0000000..930d42f
--- /dev/null
+++ b/test/404-optimizing-allocator/info.txt
@@ -0,0 +1 @@
+Initial tests for testing the optimizing compiler's register allocator.
diff --git a/test/404-optimizing-allocator/src/Main.java b/test/404-optimizing-allocator/src/Main.java
new file mode 100644
index 0000000..7b31820
--- /dev/null
+++ b/test/404-optimizing-allocator/src/Main.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$reg$ is a marker for the optimizing compiler to ensure
+// it does use its register allocator.
+
+public class Main {
+  public static void main(String[] args) {
+
+    expectEquals(4, $opt$reg$TestLostCopy());
+    expectEquals(-10, $opt$reg$TestTwoLive());
+    expectEquals(-20, $opt$reg$TestThreeLive());
+    expectEquals(5, $opt$reg$TestFourLive());
+    expectEquals(10, $opt$reg$TestMultipleLive());
+    expectEquals(1, $opt$reg$TestWithBreakAndContinue());
+    expectEquals(-15, $opt$reg$testSpillInIf(5, 6, 7));
+    expectEquals(-567, $opt$reg$TestAgressiveLive1(1, 2, 3, 4, 5, 6, 7));
+    expectEquals(-77, $opt$reg$TestAgressiveLive2(1, 2, 3, 4, 5, 6, 7));
+  }
+
+  public static int $opt$reg$TestLostCopy() {
+    int a = 0;
+    int b = 0;
+    do {
+      b = a;
+      a++;
+    } while (a != 5);
+    return b;
+  }
+
+  public static int $opt$reg$TestTwoLive() {
+    int a = 0;
+    int b = 0;
+    do {
+      a++;
+      b += 3;
+    } while (a != 5);
+    return a - b;
+  }
+
+  public static int $opt$reg$TestThreeLive() {
+    int a = 0;
+    int b = 0;
+    int c = 0;
+    do {
+      a++;
+      b += 3;
+      c += 2;
+    } while (a != 5);
+    return a - b - c;
+  }
+
+  public static int $opt$reg$TestFourLive() {
+    int a = 0;
+    int b = 0;
+    int c = 0;
+    int d = 0;
+    do {
+      a++;
+      b += 3;
+      c += 2;
+      d++;
+    } while (a != 5);
+    return d;
+  }
+
+  public static int $opt$reg$TestMultipleLive() {
+    int a = 0;
+    int b = 0;
+    int c = 0;
+    int d = 0;
+    int e = 0;
+    int f = 0;
+    int g = 0;
+    do {
+      a++;
+      b++;
+      c++;
+      d++;
+      e += 3;
+      f += 2;
+      g += 2;
+    } while (a != 5);
+    return f;
+  }
+
+  public static int $opt$reg$TestWithBreakAndContinue() {
+    int a = 0;
+    int b = 0;
+    do {
+      a++;
+      if (a == 2) {
+        continue;
+      }
+      b++;
+      if (a == 5) {
+        break;
+      }
+    } while (true);
+    return a - b;
+  }
+
+  public static int $opt$reg$testSpillInIf(int a, int b, int c) {
+    int d = 0;
+    int e = 0;
+    if (a == 5) {
+      b++;
+      c++;
+      d += 2;
+      e += 3;
+    }
+
+    return a - b - c - d - e;
+  }
+
+  public static int $opt$reg$TestAgressiveLive1(int a, int b, int c, int d, int e, int f, int g) {
+    int h = a - b;
+    int i = c - d;
+    int j = e - f;
+    int k = 42 + g - a;
+    do {
+      b++;
+      while (k != 1) {
+        --k;
+        ++i;
+        if (i == 9) {
+          ++i;
+        }
+        j += 5;
+      }
+      k = 9;
+      h++;
+    } while (h != 5);
+    return a - b - c - d - e - f - g - h - i - j - k;
+  }
+
+  public static int $opt$reg$TestAgressiveLive2(int a, int b, int c, int d, int e, int f, int g) {
+    int h = a - b;
+    int i = c - d;
+    int j = e - f;
+    int k = 42 + g - a;
+    do {
+      h++;
+    } while (h != 5);
+    return a - b - c - d - e - f - g - h - i - j - k;
+  }
+
+  public static void expectEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected: " + expected + ", got: " + value);
+    }
+  }
+}
diff --git a/test/700-LoadArgRegs/expected.txt b/test/700-LoadArgRegs/expected.txt
new file mode 100644
index 0000000..4908e5b
--- /dev/null
+++ b/test/700-LoadArgRegs/expected.txt
@@ -0,0 +1,75 @@
+11
+21, 22
+31, 32, 33
+41, 42, 43, 44
+51, 52, 53, 54, 55
+61, 62, 63, 64, 65, 66
+71, 72, 73, 74, 75, 76, 77
+81, 82, 83, 84, 85, 86, 87, 88
+91, 92, 93, 94, 95, 96, 97, 98, 99
+101, 102, 103, 104, 105, 106, 107, 108, 109, 110
+111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111
+121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212
+61, 62, 63, 64, 65, 66
+true
+true, false
+true, false, true
+true, false, true, false
+true, false, true, false, true
+true, false, true, false, true, false
+true, false, true, false, true, false, true
+a
+a, b
+a, b, c
+a, b, c, d
+a, b, c, d, e
+a, b, c, d, e, f
+a, b, c, d, e, f, g
+11
+11, b
+11, b, true
+11, b, true, 12
+11, b, true, 12, e
+11, b, true, 12, e, false
+11, b, true, 12, e, false, 13
+1.1
+2.1, 2.2
+3.1, 3.2, 3.3
+4.1, 4.2, 4.3, 4.4
+5.1, 5.2, 5.3, 5.4, 5.5
+6.1, 6.2, 6.3, 6.4, 6.5, 6.6
+7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
+8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8
+9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9
+10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 10.1
+1.01
+2.01, 2.02
+3.01, 3.02, 3.03
+4.01, 4.02, 4.03, 4.04
+5.01, 5.02, 5.03, 5.04, 5.05
+6.01, 6.02, 6.03, 6.04, 6.05, 6.06
+7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07
+8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08
+9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09
+-1.1, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09
+10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01
+100011
+100021, 100022
+100031, 100032, 100033
+100041, 100042, 100043, 100044
+100051, 100052, 100053, 100054, 100055
+100061, 100062, 100063, 100064, 100065, 100066
+100071, 100072, 100073, 100074, 100075, 100076, 100077
+100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088
+100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099
+100100100100011
+-11
+-21, -22
+-31, -32, -33
+-41, -42, -43, -44
+-51, -52, -53, -54, -55
+-61, -62, -63, -64, -65, -66
+-71, -72, -73, -74, -75, -76, -77
+-81, -82, -83, -84, -85, -86, -87, -88
+-91, -92, -93, -94, -95, -96, -97, -98, -99
+-1, -91, -92, -93, -94, -95, -96, -97, -98, -99
diff --git a/test/700-LoadArgRegs/info.txt b/test/700-LoadArgRegs/info.txt
new file mode 100644
index 0000000..dcaa46e
--- /dev/null
+++ b/test/700-LoadArgRegs/info.txt
@@ -0,0 +1 @@
+Simple tests for passing int/float/long/double arguments.
diff --git a/test/700-LoadArgRegs/src/Main.java b/test/700-LoadArgRegs/src/Main.java
new file mode 100644
index 0000000..281ab16
--- /dev/null
+++ b/test/700-LoadArgRegs/src/Main.java
@@ -0,0 +1,288 @@
+public class Main {
+
+  static void testI1(int p1) {
+     System.out.println(p1);
+  }
+  static void testI2(int p1, int p2) {
+     System.out.println(p1+", "+p2);
+  }
+  static void testI3(int p1, int p2, int p3) {
+     System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testI4(int p1, int p2, int p3, int p4) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testI5(int p1, int p2, int p3, int p4, int p5) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testI6(int p1, int p2, int p3, int p4, int p5, int p6) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testI7(int p1, int p2, int p3, int p4, int p5, int p6, int p7) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+  static void testI8(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8);
+  }
+  static void testI9(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9);
+  }
+  static void testI10(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10);
+  }
+  static void testI11(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11);
+  }
+  static void testI12(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11, int p12) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11+", "+p12);
+  }
+  void testI6_nonstatic(int p1, int p2, int p3, int p4, int p5, int p6) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+
+  static void testB1(boolean p1) {
+    System.out.println(p1);
+  }
+  static void testB2(boolean p1, boolean p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testB3(boolean p1, boolean p2, boolean p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testB4(boolean p1, boolean p2, boolean p3, boolean p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testB5(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testB6(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testB7(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6, boolean p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+
+  static void testO1(Object p1) {
+    System.out.println(p1);
+  }
+  static void testO2(Object p1, Object p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testO3(Object p1, Object p2, Object p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testO4(Object p1, Object p2, Object p3, Object p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testO5(Object p1, Object p2, Object p3, Object p4, Object p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testO6(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testO7(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6, Object p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+
+  static void testIOB1(int p1) {
+    System.out.println(p1);
+  }
+  static void testIOB2(int p1, Object p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testIOB3(int p1, Object p2, boolean p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testIOB4(int p1, Object p2, boolean p3, int p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testIOB5(int p1, Object p2, boolean p3, int p4, Object p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testIOB6(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testIOB7(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6, int p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+
+  static void testF1(float p1) {
+    System.out.println(p1);
+  }
+  static void testF2(float p1, float p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testF3(float p1, float p2, float p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testF4(float p1, float p2, float p3, float p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testF5(float p1, float p2, float p3, float p4, float p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testF6(float p1, float p2, float p3, float p4, float p5, float p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testF7(float p1, float p2, float p3, float p4, float p5, float p6, float p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+  static void testF8(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8);
+  }
+  static void testF9(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9);
+  }
+  static void testF10(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9, float p10) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10);
+  }
+
+  static void testD1 (double p1) { System.out.println(p1); }
+  static void testD2 (double p1, double p2) { System.out.println(p1+", "+p2); }
+  static void testD3 (double p1, double p2, double p3) { System.out.println(p1+", "+p2+", "+p3); }
+  static void testD4 (double p1, double p2, double p3, double p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); }
+  static void testD5 (double p1, double p2, double p3, double p4, double p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); }
+  static void testD6 (double p1, double p2, double p3, double p4, double p5, double p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); }
+  static void testD7 (double p1, double p2, double p3, double p4, double p5, double p6, double p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); }
+  static void testD8 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); }
+  static void testD9 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+  static void testD9f (float p0, double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+  static void testD10(double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9, double p10) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10); }
+
+  static void testI() {
+    testI1(11);
+    testI2(21, 22);
+    testI3(31, 32, 33);
+    testI4(41, 42, 43, 44);
+    testI5(51, 52, 53, 54, 55);
+    testI6(61, 62, 63, 64, 65, 66);
+    testI7(71, 72, 73, 74, 75, 76, 77);
+    testI8(81, 82, 83, 84, 85, 86, 87, 88);
+    testI9(91, 92, 93, 94, 95, 96, 97, 98, 99);
+    testI10(101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+    testI11(111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111);
+    testI12(121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212);
+    new Main().testI6_nonstatic(61, 62, 63, 64, 65, 66);
+  }
+
+  static void testB() {
+    testB1(true);
+    testB2(true, false);
+    testB3(true, false, true);
+    testB4(true, false, true, false);
+    testB5(true, false, true, false, true);
+    testB6(true, false, true, false, true, false);
+    testB7(true, false, true, false, true, false, true);
+  }
+
+  static void testO() {
+    testO1("a");
+    testO2("a", "b");
+    testO3("a", "b", "c");
+    testO4("a", "b", "c", "d");
+    testO5("a", "b", "c", "d", "e");
+    testO6("a", "b", "c", "d", "e", "f");
+    testO7("a", "b", "c", "d", "e", "f", "g");
+  }
+
+  static void testIOB() {
+    testIOB1(11);
+    testIOB2(11, "b");
+    testIOB3(11, "b", true);
+    testIOB4(11, "b", true, 12);
+    testIOB5(11, "b", true, 12, "e");
+    testIOB6(11, "b", true, 12, "e", false);
+    testIOB7(11, "b", true, 12, "e", false, 13);
+  }
+
+  static void testF() {
+    testF1(1.1f);
+    testF2(2.1f, 2.2f);
+    testF3(3.1f, 3.2f, 3.3f);
+    testF4(4.1f, 4.2f, 4.3f, 4.4f);
+    testF5(5.1f, 5.2f, 5.3f, 5.4f, 5.5f);
+    testF6(6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f);
+    testF7(7.1f, 7.2f, 7.3f, 7.4f, 7.5f, 7.6f, 7.7f);
+    testF8(8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f);
+    testF9(9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f);
+    testF10(10.1f, 10.2f, 10.3f, 10.4f, 10.5f, 10.6f, 10.7f, 10.8f, 10.9f, 10.1f);
+  }
+
+  static void testD() {
+
+    testD1(1.01);
+    testD2(2.01, 2.02);
+    testD3(3.01, 3.02, 3.03);
+    testD4(4.01, 4.02, 4.03, 4.04);
+    testD5(5.01, 5.02, 5.03, 5.04, 5.05);
+    testD6(6.01, 6.02, 6.03, 6.04, 6.05, 6.06);
+    testD7(7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07);
+    testD8(8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08);
+    testD9(9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09);
+    testD9f(-1.1f, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09);
+
+    // TODO: 10.01 as first arg fails: 10.009994506835938
+    testD10(10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01);
+  }
+
+  static void testL1(long p1) { System.out.println(p1); }
+//  static void testL2x(long p1, long p2) { testL2(p1+p2, p2); }  // TODO(64) GenAddLong 64BIT_TEMP
+  static void testL2(long p1, long p2) { System.out.println(p1+", "+p2); }
+  static void testL3(long p1, long p2, long p3) { System.out.println(p1+", "+p2+", "+p3); }
+  static void testL4(long p1, long p2, long p3, long p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); }
+  static void testL5(long p1, long p2, long p3, long p4, long p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); }
+  static void testL6(long p1, long p2, long p3, long p4, long p5, long p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); }
+  static void testL7(long p1, long p2, long p3, long p4, long p5, long p6, long p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); }
+  static void testL8(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); }
+  static void testL9(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+
+  static void testL9i(int p0, long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+
+  static void testL() {
+//        testL2x(100021, 100022);
+        testL1(100011);
+        testL2(100021, 100022);
+        testL3(100031, 100032, 100033);
+        testL4(100041, 100042, 100043, 100044);
+        testL5(100051, 100052, 100053, 100054, 100055);
+        testL6(100061, 100062, 100063, 100064, 100065, 100066);
+        testL7(100071, 100072, 100073, 100074, 100075, 100076, 100077);
+        testL8(100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088);
+        testL9(100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099);
+  }
+
+  static void testLL() {
+        testL1(100100100100011L);
+
+        testL1(-11L);
+        testL2(-21L, -22L);
+        testL3(-31L, -32L, -33L);
+        testL4(-41L, -42L, -43L, -44L);
+        testL5(-51L, -52L, -53L, -54L, -55L);
+        testL6(-61L, -62L, -63L, -64L, -65L, -66L);
+        testL7(-71L, -72L, -73L, -74L, -75L, -76L, -77L);
+        testL8(-81L, -82L, -83L, -84L, -85L, -86L, -87L, -88L);
+        testL9(-91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L);
+        testL9i(-1, -91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L);
+
+        // TODO(64) GenAddLong 64BIT_TEMP
+//        testL2x(100100100100011L, 1L);
+//        testL2x(100100100100011L, 100100100100011L);
+  }
+
+  static public void main(String[] args) throws Exception {
+
+    testI();
+    testB();
+    testO();
+    testIOB();
+    testF();
+
+    testD();
+
+    testL();
+
+    testLL();
+
+  }
+}
diff --git a/test/Android.mk b/test/Android.mk
index c15259c..7897449 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -143,8 +143,8 @@
 	mkdir -p /tmp/android-data/test-art-host-oat-default-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-default-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
-	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  LD_LIBRARY_PATH=$(HOST_LIBRARY_PATH) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$(HOST_CORE_IMG_LOCATION) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_LIBRARY_PATH) $(1) $(2) \
           && echo test-art-host-oat-default-$(1) PASSED || (echo test-art-host-oat-default-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-default-$(1)
 
@@ -153,8 +153,8 @@
 	mkdir -p /tmp/android-data/test-art-host-oat-interpreter-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-interpreter-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
-	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) $(DALVIKVM_FLAGS) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  LD_LIBRARY_PATH=$(HOST_LIBRARY_PATH) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$(HOST_CORE_IMG_LOCATION) $(DALVIKVM_FLAGS) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_LIBRARY_PATH) $(1) $(2) \
           && echo test-art-host-oat-interpreter-$(1) PASSED || (echo test-art-host-oat-interpreter-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-interpreter-$(1)
 
diff --git a/test/JniTest/JniTest.java b/test/JniTest/JniTest.java
index 3c4ed35..33418a9 100644
--- a/test/JniTest/JniTest.java
+++ b/test/JniTest/JniTest.java
@@ -21,6 +21,7 @@
         System.loadLibrary("arttest");
         testFindClassOnAttachedNativeThread();
         testFindFieldOnAttachedNativeThread();
+        testReflectFieldGetFromAttachedNativeThreadNative();
         testCallStaticVoidMethodOnSubClass();
         testGetMirandaMethod();
         testZeroLengthByteBuffers();
@@ -34,6 +35,10 @@
 
     private static boolean testFindFieldOnAttachedNativeThreadField;
 
+    private static native void testReflectFieldGetFromAttachedNativeThreadNative();
+
+    public static boolean testReflectFieldGetFromAttachedNativeThreadField;
+
     private static void testFindFieldOnAttachedNativeThread() {
       testFindFieldOnAttachedNativeThreadNative();
       if (!testFindFieldOnAttachedNativeThreadField) {
diff --git a/test/JniTest/jni_test.cc b/test/JniTest/jni_test.cc
index 024ba53..36cad72 100644
--- a/test/JniTest/jni_test.cc
+++ b/test/JniTest/jni_test.cc
@@ -103,6 +103,66 @@
   assert(pthread_join_result == 0);
 }
 
+static void* testReflectFieldGetFromAttachedNativeThread(void*) {
+  assert(jvm != NULL);
+
+  JNIEnv* env = NULL;
+  JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, NULL };
+  int attach_result = jvm->AttachCurrentThread(&env, &args);
+  assert(attach_result == 0);
+
+  jclass clazz = env->FindClass("JniTest");
+  assert(clazz != NULL);
+  assert(!env->ExceptionCheck());
+
+  jclass class_clazz = env->FindClass("java/lang/Class");
+  assert(class_clazz != NULL);
+  assert(!env->ExceptionCheck());
+
+  jmethodID getFieldMetodId = env->GetMethodID(class_clazz, "getField",
+                                               "(Ljava/lang/String;)Ljava/lang/reflect/Field;");
+  assert(getFieldMetodId != NULL);
+  assert(!env->ExceptionCheck());
+
+  jstring field_name = env->NewStringUTF("testReflectFieldGetFromAttachedNativeThreadField");
+  assert(field_name != NULL);
+  assert(!env->ExceptionCheck());
+
+  jobject field = env->CallObjectMethod(clazz, getFieldMetodId, field_name);
+  assert(field != NULL);
+  assert(!env->ExceptionCheck());
+
+  jclass field_clazz = env->FindClass("java/lang/reflect/Field");
+  assert(field_clazz != NULL);
+  assert(!env->ExceptionCheck());
+
+  jmethodID getBooleanMetodId = env->GetMethodID(field_clazz, "getBoolean",
+                                                 "(Ljava/lang/Object;)Z");
+  assert(getBooleanMetodId != NULL);
+  assert(!env->ExceptionCheck());
+
+  jboolean value = env->CallBooleanMethod(field, getBooleanMetodId, /* ignored */ clazz);
+  assert(value == false);
+  assert(!env->ExceptionCheck());
+
+  int detach_result = jvm->DetachCurrentThread();
+  assert(detach_result == 0);
+  return NULL;
+}
+
+// http://b/15539150
+extern "C" JNIEXPORT void JNICALL Java_JniTest_testReflectFieldGetFromAttachedNativeThreadNative(
+    JNIEnv*, jclass) {
+  pthread_t pthread;
+  int pthread_create_result = pthread_create(&pthread,
+                                             NULL,
+                                             testReflectFieldGetFromAttachedNativeThread,
+                                             NULL);
+  assert(pthread_create_result == 0);
+  int pthread_join_result = pthread_join(pthread, NULL);
+  assert(pthread_join_result == 0);
+}
+
 
 // http://b/11243757
 extern "C" JNIEXPORT void JNICALL Java_JniTest_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,
diff --git a/test/ReferenceMap/stack_walk_refmap_jni.cc b/test/ReferenceMap/stack_walk_refmap_jni.cc
index 48a6c61..87187ed 100644
--- a/test/ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/ReferenceMap/stack_walk_refmap_jni.cc
@@ -33,8 +33,8 @@
 
 namespace art {
 
-#define IS_IN_REF_BITMAP(mh, ref_bitmap, reg) \
-    (((reg) < mh.GetCodeItem()->registers_size_) && \
+#define IS_IN_REF_BITMAP(ref_bitmap, reg) \
+    (((reg) < m->GetCodeItem()->registers_size_) && \
      ((*((ref_bitmap) + (reg)/8) >> ((reg) % 8) ) & 0x01))
 
 #define CHECK_REGS_CONTAIN_REFS(...)     \
@@ -42,7 +42,7 @@
     int t[] = {__VA_ARGS__};             \
     int t_size = sizeof(t) / sizeof(*t);      \
     for (int i = 0; i < t_size; ++i)          \
-      CHECK(IS_IN_REF_BITMAP(mh, ref_bitmap, t[i])) \
+      CHECK(IS_IN_REF_BITMAP(ref_bitmap, t[i])) \
           << "Error: Reg @ " << i << "-th argument is not in GC map"; \
   } while (false)
 
@@ -67,8 +67,7 @@
     }
 
     const uint8_t* ref_bitmap = NULL;
-    MethodHelper mh(m);
-    std::string m_name(mh.GetName());
+    std::string m_name(m->GetName());
 
     // Given the method name and the number of times the method has been called,
     // we know the Dex registers with live reference values. Assert that what we
diff --git a/test/SignalTest/signaltest.cc b/test/SignalTest/signaltest.cc
index b84e395..dfe3197 100644
--- a/test/SignalTest/signaltest.cc
+++ b/test/SignalTest/signaltest.cc
@@ -46,7 +46,7 @@
   action.sa_sigaction = signalhandler;
   sigemptyset(&action.sa_mask);
   action.sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__mips__)
+#if !defined(__APPLE__) && !defined(__mips__)
   action.sa_restorer = nullptr;
 #endif
 
diff --git a/test/StackWalk/stack_walk_jni.cc b/test/StackWalk/stack_walk_jni.cc
index 3cf2d0b..c849c54 100644
--- a/test/StackWalk/stack_walk_jni.cc
+++ b/test/StackWalk/stack_walk_jni.cc
@@ -30,15 +30,15 @@
 
 namespace art {
 
-#define REG(mh, reg_bitmap, reg) \
-    (((reg) < mh.GetCodeItem()->registers_size_) && \
+#define REG(reg_bitmap, reg) \
+    (((reg) < m->GetCodeItem()->registers_size_) && \
      ((*((reg_bitmap) + (reg)/8) >> ((reg) % 8) ) & 0x01))
 
 #define CHECK_REGS(...) if (!IsShadowFrame()) { \
     int t[] = {__VA_ARGS__}; \
     int t_size = sizeof(t) / sizeof(*t); \
     for (int i = 0; i < t_size; ++i) \
-      CHECK(REG(mh, reg_bitmap, t[i])) << "Error: Reg " << i << " is not in RegisterMap"; \
+      CHECK(REG(reg_bitmap, t[i])) << "Error: Reg " << i << " is not in RegisterMap"; \
   }
 
 static int gJava_StackWalk_refmap_calls = 0;
@@ -64,8 +64,7 @@
       NativePcOffsetToReferenceMap map(m->GetNativeGcMap());
       reg_bitmap = map.FindBitMap(GetNativePcOffset());
     }
-    MethodHelper mh(m);
-    StringPiece m_name(mh.GetName());
+    StringPiece m_name(m->GetName());
 
     // Given the method name and the number of times the method has been called,
     // we know the Dex registers with live reference values. Assert that what we
diff --git a/tools/Android.mk b/tools/Android.mk
index 6c385dc..d3be17f 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -16,7 +16,6 @@
 
 LOCAL_PATH := $(call my-dir)
 
-ifeq ($(WITH_HOST_DALVIK),true)
 # Copy the art shell script to the host's bin directory
 include $(CLEAR_VARS)
 LOCAL_IS_HOST_MODULE := true
@@ -28,5 +27,3 @@
 	@echo "Copy: $(PRIVATE_MODULE) ($@)"
 	$(copy-file-to-new-target)
 	$(hide) chmod 755 $@
-
-endif